Search code examples
rggplot2facetaxis-labels

Color axis text based on variable value in a faceted plot


I have two faceted plots and want the axis text to be grey when a given variable takes the value of NA or 0. This currently works for a single plot but when I facet them, the text color doesn't match up with the bar values. See the reprex below:

# create df
text <-   
"   country          domain var_name    perc           color
        A             'domain c           Val2      NA            grey
        A             'domain c'           Val1      NA            grey
        A             'domain c'           Val3 0.01670          orange
        A             'domain c'           Val8 0.00000            grey
        A             'domain c'           Val9      NA            grey
        A             'domain c'          Val11 0.02510          orange
        A             'domain c'          Val19 0.01890          orange
        A           'domain d'          Val16 0.04840          purple
        A            'domain a'           Val5 0.00776 darkolivegreen4
       A            'domain a'           Val6 0.02390 darkolivegreen4
       A            'domain a'           Val7 0.00247 darkolivegreen4
       A            'domain a'          Val10 0.03840 darkolivegreen4
       A            'domain a'          Val13 0.02490 darkolivegreen4
       A            'domain a'          Val18      NA            grey
       A            'domain b'           Val4 0.01630            navy
       A             'domain b'          Val14 0.01610            navy
       A             'domain b'          Val12 0.05180            navy
       A             'domain b'          Val17 0.01770            navy
       A             'domain b'          Val15 0.03550            navy
       B             'domain c'           Val2 0.01440          orange
       B             'domain c'           Val1      NA            grey
       B             'domain c'           Val3 0.02590          orange
       B             'domain c'           Val8 0.00000            grey
       B             'domain c'           Val9     NaN            grey
       B             'domain c'          Val11 0.02900          orange
       B             'domain c'          Val19 0.00000            grey
       B 'domain d'          Val16 0.00261          purple
       B            'domain a'           Val5 0.10900 darkolivegreen4
       B            'domain a'           Val6 0.00702 darkolivegreen4
       B            'domain a'           Val7 0.01330 darkolivegreen4
       B            'domain a'          Val10 0.00861 darkolivegreen4
       B            'domain a'          Val13 0.06050 darkolivegreen4
       B            'domain a'          Val18 0.07770 darkolivegreen4
       B            'domain b'           Val4 0.00797            navy
       B             'domain b'          Val14 0.05230            navy
       B             'domain b'          Val12 0.04290            navy
       B             'domain b'          Val17 0.03190            navy
       B             'domain b'          Val15 0.06940            navy" 

tbl <- read.table(text = text, header = T, fill = T) 

# overwrite coord_polar function
cp <- coord_polar(theta = "x")
cp$is_free <- function() TRUE

# plot
p <-
  ggplot(tbl, aes(x = forcats::as_factor(var_name), y = perc)) +
  cp +
  geom_bar(stat = "identity", aes(fill = color)) +
  scale_y_continuous(labels = scales::label_percent()) + 
  scale_fill_identity(name = "Domain") +
  facet_grid(. ~ country, scales = "fixed") +
  theme_bw() +
  theme(aspect.ratio = 1,
        strip.text = element_text(size = 16),
        axis.title = element_text(size = 18),
        title = element_text(size = 20),
        axis.text.x = element_text(colour = tbl$color, face = "bold"),
        legend.text = element_text(size = 14))

p

Which yields this image:

this image

Note how for country B, Var18 is grey when there is clearly a non-zero quantity of that var. This is because country A has 0 of that value.

I would like to avoid using Grid::, but any advice on how to overcome this issue would be very much appreciated!


Solution

  • So I have found a way to fix the axis colors and scale plots using grid. Based on the above reprex:

    # Generate a function to get the legend of one of the ggplots
    get_legend<-function(myggplot){
        tmp <- ggplot_gtable(ggplot_build(myggplot))
        leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
        legend <- tmp$grobs[[leg]]
        return(legend)
      }
    
    # From the full dataset, find the value of the country with the highest percent of any var_name
    
    max <- round(max(tbl$perc), digits = 2)
    
    # create a sequence of length 6 from 0 to the largest perc value
    max_seq <- seq(0, max, length = 6)
    
    # initiate empty list 
    my_list <- list()
    
    # list of countries to loop through
    my_sub <- c("A", "B")
    

    Now we loop through each country, saving each country plot to the empty list.

    for(i in my_sub){
    
      ### Wrangle
      tbl_sub <-
          tbl %>%
          dplyr::mutate(country = as.factor(country),
                        domain = as.factor(domain)) %>%       
          dplyr::filter(country == i),
          dplyr::mutate(perc = ifelse(is.na(perc), 0, perc))
    
      # Create custom coord_polar arguments 
      cp <- coord_polar(theta = "x", clip = "off")
      cp$is_free <- function() TRUE
    
      p <-
        ggplot(dplyr::filter(tbl_sub, country == i), 
               aes(x = forcats::as_factor(var_name), 
                   y = perc)) +
               cp +
               geom_bar(stat = "identity", aes(fill = color)) +
               facet_grid(. ~ country, scales = "fixed") +
               scale_y_continuous(breaks = c(max_seq), 
                                  labels = scales::label_percent(), 
                                  limits = c(0, max(max_seq))) +
               scale_fill_identity(guide = "legend", 
                                   name = "Domain", 
                                   labels = c(darkolivegreen4 = "domain a", 
                                              orange = "domain c", 
                                              navy = "domain b" , 
                                              purple = "domain d", 
                                              grey = "not applicable")) +
               labs(x = "",
                    y = "") +
               theme_bw() +
               theme(aspect.ratio = 1,
                     panel.border = element_blank(),
                     strip.text = element_text(size = 16),
                     axis.title = element_text(size = 18),
                     title = element_text(size = 20),
                     axis.text.x = element_text(colour = tbl_new$color, face = "bold"),
                     legend.text = element_text(size = 14))
    
      my_list[[i]] <- p
    
      }
    

    Now we have the plots in a list, we want to play around with the legend and use grid:: and gridExtra to plot everything together.

    # pull legend from first ggplot in the list 
    legend <- get_legend(my_list[[1]])
    
    # remove legends from all the plots in the list
    for(i in 1:length(my_list)){
      my_list[[i]] <- my_list[[i]] + theme(legend.position = "none")
    }
    
    # plot everything together
    p <- grid.arrange(arrangeGrob(
      grobs = my_list,
      nrow = round(length(my_sub)/2, 0),
      left = textGrob("Y axis",
                        gp = gpar(fontsize = 20),
                        rot = 90),
      bottom = textGrob("X axis",
                          gp = gpar(fontsize = 20),
                          vjust = -3),
      top = textGrob("Big plot",
                       gp = gpar(fontsize = 28, vjust = 2))),
      legend = legend,
      widths = c(9,1,1),
      clip = F)
    

    This yields this image: enter image description here

    The plots are scaled to the country with the largest perc value (0 - 11%), and each country has unique greyed-out values depending on whether there is a 0 or NA in the perc column.

    I'm sure there are more simplistic solutions, but this is serving me for now!