Search code examples
rggplot2fillgeom-bar

order and fill with 2 different variables geom_bar ggplot2 R


I have a question concerning the fill field in geom_bar of the ggplot2 package.

I would like to fill my geom_bar with a variable (in the next example the variable is called var_fill) but order the geom_plot with another variable (called clarity in the example).

How can I do that?

Thank you very much!

The example:

rm(list=ls())

set.seed(1)

library(dplyr)
data_ex <- diamonds %>% 
  group_by(cut, clarity) %>%
  summarise(count = n()) %>%
  ungroup() %>%
  mutate(var_fill= LETTERS[sample.int(3, 40, replace = TRUE)])

head(data_ex)

# A tibble: 6 x 4
   cut  clarity count var_fill
  <ord>   <ord> <int>    <chr>
1  Fair      I1   210        A
2  Fair     SI2   466        B
3  Fair     SI1   408        B
4  Fair     VS2   261        C
5  Fair     VS1   170        A
6  Fair    VVS2    69        C

I would like this order of the boxes [clarity] :

library(ggplot2)
ggplot(data_ex) + 
  geom_bar(aes(x = cut, y = count, fill=clarity),stat = "identity", position = "fill", color="black")

enter image description here

with this fill (color) of the boxes [var_fill] :

ggplot(data_ex) + 
  geom_bar(aes(x = cut, y = count, fill=var_fill),stat = "identity", position = "fill", color="black")

enter image description here

EDIT1 : answer found by missuse :

p1 <- ggplot(data_ex) + geom_bar(aes(x = cut, y = count, group = clarity, fill = var_fill), stat = "identity", position = "fill", color="black")+ ggtitle("var fill")

p2 <- ggplot(data_ex) +  geom_bar(aes(x = cut, y = count, fill = clarity), stat = "identity", position = "fill", color = "black")+ ggtitle("clarity")

library(cowplot)
cowplot::plot_grid(p1, p2)

enter image description here

EDIT2 : Now i tried to do this with ggmosaic extension with the help of missuse

rm(list=ls())
set.seed(1)
library(ggplot2)
library(dplyr)
library(ggmosaic)

data_ex <- diamonds %>% 
  group_by(cut, clarity) %>%
  summarise(count = n()) %>%
  ungroup() %>%
  mutate(residu= runif(nrow(.), min=-4.5, max=5)) %>%
  mutate(residu_classe = case_when(residu < -4~"< -4 (p<0.001)",(residu >= -4 & residu < -2)~"[-4;-2[ (p<0.05)",(residu >= -2 & residu < 2)~"[-2;2[ non significatif",(residu >= 2 & residu < 4)~"[2;4[ (p<0.05)",residu >= 4~">= 4 (p<0.001)")) %>%
  mutate(residu_color = case_when(residu < -4~"#D04864",(residu >= -4 & residu < -2)~"#E495A5",(residu >= -2 & residu < 2)~"#CCCCCC",(residu >= 2 & residu < 4)~"#9DA8E2",residu >= 4~"#4A6FE3")) 


ggplot(data_ex) +
  geom_mosaic(aes(weight= count, x=product(clarity, cut)),  fill = data_ex$residu_color, na.rm=T)+
  scale_y_productlist() +
  theme_classic() +
  theme(axis.ticks=element_blank(), axis.line=element_blank())+
  labs(x = "cut",y="clarity")

enter image description here

But I would like to add this legend (below) on the right of the plot but I don't know how I could do it because the fill field is outside aes so scale_fill_manual does not work...

enter image description here


Solution

  • Using group aesthetic:

    p1 <- ggplot(data_ex) + 
      geom_bar(aes(x = cut, y = count, group = clarity, fill = var_fill),
               stat = "identity", position = "fill", color="black") + ggtitle("var fill")
    
    p2 <- ggplot(data_ex) + 
      geom_bar(aes(x = cut, y = count, fill = clarity), stat = "identity", position = "fill", color = "black")+
      ggtitle("clarity")
    
    library(cowplot)
    cowplot::plot_grid(p1, p2)
    

    enter image description here

    EDIT: with ggmosaic

    library(ggmosaic)
    
    p3 <- ggplot(data_ex) +
      geom_mosaic(aes(weight= count, x=product(clarity, cut), fill=var_fill), na.rm=T)+
      scale_x_productlist()
    
    p4 <- ggplot(data_ex) +
      geom_mosaic(aes(weight= count, x=product(clarity, cut), fill=clarity,), na.rm=T)+
      scale_x_productlist()
    
    cowplot::plot_grid(p3, p4)
    

    enter image description here

    Seems to me for ggmosaic the group is not needed at all, both plots are reversed versions of geom_bar.

    EDIT3:
    defining fill outside the aes fixes the problems such as:
    1) X axis readability
    2) removes the very small colored lines in the borders of each rectangle

    data_ex %>%
    mutate(color = ifelse(var_fill == "A", "#0073C2FF", ifelse(var_fill == "B", "#EFC000FF", "#868686FF"))) -> try2
    
    ggplot(try2) +
      geom_mosaic(aes(weight= count, x=product(clarity, cut)),  fill = try2$color, na.rm=T)+
      scale_x_productlist()
    

    enter image description here

    To add y axis labels one needs a bit of wrangling. Here is an approach:

    ggplot(try2) +
      geom_mosaic(aes(weight= count, x=product(clarity, cut)),  fill = try2$color, na.rm=T)+
      scale_x_productlist()+
      scale_y_continuous(sec.axis = dup_axis(labels = unique(try2$clarity),
                                             breaks = try2 %>%
                                               filter(cut == "Ideal") %>%
                                               mutate(count2 = cumsum(count/sum(count)),
                                                      lag = lag(count2)) %>%
                                               replace(is.na(.), 0) %>%
                                               rowwise() %>%
                                               mutate(post = sum(count2, lag)/2)%>%
                                               select(post) %>%
                                               unlist()))
    

    enter image description here

    EDIT4: adding the legend can be accomplished in two ways.

    1 - by adding a fake layer to generate the legend - however this produces a problem with the x axis labels (they are a combination of cut and fill) hence I defined the manual breaks and labels

    data_ex from OP edit2

    ggplot(data_ex) +
      geom_mosaic(aes(weight= count, x=product(clarity, cut), fill = residu_classe), alpha=0, na.rm=T)+
      geom_mosaic(aes(weight= count, x=product(clarity, cut)), fill = data_ex$residu_color, na.rm=T)+
      scale_y_productlist()+
      theme_classic() +
      theme(axis.ticks=element_blank(), axis.line=element_blank())+
      labs(x = "cut",y="clarity")+
      scale_fill_manual(values = unique(data_ex$residu_color), breaks = unique(data_ex$residu_classe))+
      guides(fill = guide_legend(override.aes = list(alpha = 1)))+
      scale_x_productlist(breaks = data_ex %>% 
                            group_by(cut) %>%
                            summarise(sumer = sum(count)) %>% 
                            mutate(sumer = cumsum(sumer/sum(sumer)),
                                   lag = lag(sumer)) %>%
                            replace(is.na(.), 0) %>%
                            rowwise() %>%
                            mutate(post = sum(sumer, lag)/2)%>%
                            select(post) %>%
                            unlist(), labels = unique(data_ex$cut))
    

    enter image description here

    2 - by extracting the legend from one plot and adding it to the other

    library(gtable)              
    library(gridExtra) 
    

    make fake plot for legend:

    gg_pl <- ggplot(data_ex) +
      geom_mosaic(aes(weight= count, x=product(clarity, cut), fill = residu_classe), alpha=1, na.rm=T)+
      scale_fill_manual(values = unique(data_ex$residu_color), breaks = unique(data_ex$residu_classe))
    

    make the correct plot

    z = ggplot(data_ex) +
      geom_mosaic(aes(weight= count, x=product(clarity, cut)), fill = data_ex$residu_color, na.rm=T)+
      scale_y_productlist()+
      theme_classic() +
      theme(axis.ticks=element_blank(), axis.line=element_blank())+
      labs(x = "cut",y="clarity")
    
    
    a.gplot <- ggplotGrob(gg_pl)
    tab <- gtable::gtable_filter(a.gplot, 'guide-box', fixed=TRUE)
    gridExtra::grid.arrange(z, tab, nrow = 1, widths = c(4,1))
    

    enter image description here