Search code examples
rggplot2subsetfactors

Why does ggplot2 ignore factor level order when plotting in this script?


As I understand it the order of the items in the legend is best controlled by controlling the order of the levels of the relevant factor. However when I set the order of the factor levels, the resulting plot seems to ignore them (see code below). From other questions it seems that subsetting the dataframe might be a cause of the issue. I'm making diagrams of the positions of features on schematics of protein sequences, starting from a big table containing lots of different types of features. This means that I can't avoid subsetting the data to allow me to plot different features in different ways.

So my questions are:

1) How can I control the order of the items in the legend in this case?
2) Ideally I would like a separate legend for each geom_point layer - so I have one entitled 'Motifs' and the other 'PTM'. Is this possible?

library(tidyverse)

df <- as.data.frame(
  type = as.factor(c("Chain", "PTM", "PTM", "Motif", "Motif", "PTM", "Motif", "Chain", "PTM", "PTM", "Motif", "Motif")),
  description = as.factor(c("seq", "methyl", "methyl", "RXL", "RXL", "amine", "CXXC", "seq", "amine", "methyl", "CXXC", "RXL")),
  begin = c(1, 20, 75, 150, 67, 289, 100, 1, 124, 89, 73, 6),
  order = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2),
  length = c(300, 1, 1, 1, 1, 1, 1, 350, 1, 1, 1, 1)
)

plot_start <- -100
plot_end <- 500

dfplot <- ggplot() +
  xlim(plot_start, plot_end) +
  scale_y_continuous(expand = c(0,0), limits =c(0, 2.5))

# white background
dfplot <- dfplot + theme_bw() +  
  theme(panel.grid.minor=element_blank(),
        panel.grid.major=element_blank()) +
  theme(axis.ticks = element_blank(),
        axis.text.y = element_blank()) +
  theme(panel.border = element_blank())

#plot chains
dfplot <- dfplot + geom_rect(data= df[df$type == "Chain",],
                                               mapping=aes(xmin=begin,
                                                           xmax=length,
                                                           ymin=order-0.2,
                                                           ymax=order+0.2),
                                               colour = "blue",
                                               fill = "#C4D9E9")

#set desired order of factor levels
df$description<-factor(df$description, levels = c("amine", "methyl", "RXL", "seq", "CXXC"))

#plot motif positions

dfplot <- dfplot + geom_point(data = filter(df, type == "Motif"),
                                       aes(begin, order, shape = description, color = description), 
                                       size = 3,)

#plot modification positions

dfplot <- dfplot + geom_point(data = filter(df, type == "PTM"),
                              aes(begin, (order + 0.25), shape = description, color = description), 
                              size = 3) 

dfplot

The plot generated


Solution

  • For reasons that I don't really understand, the factor order is ignored when you use geom_point twice. Modifying the data so that you only need to call geom_point once fixed the problem.

    library(tidyverse)
    
    df <- data.frame(
      type = as.factor(c("Chain", "PTM", "PTM", "Motif", "Motif", "PTM", "Motif", "Chain", "PTM", "PTM", "Motif", "Motif")),
      description = as.factor(c("seq", "methyl", "methyl", "RXL", "RXL", "amine", "CXXC", "seq", "amine", "methyl", "CXXC", "RXL")),
      begin = c(1, 20, 75, 150, 67, 289, 100, 1, 124, 89, 73, 6),
      order = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2),
      length = c(300, 1, 1, 1, 1, 1, 1, 350, 1, 1, 1, 1)
    )
    
    #set desired order of factor levels
    df <- df %>% mutate(
      order = if_else(type == "PTM", true = order + 0.25, false = order),
      description = factor(description, levels = c("amine", "methyl", "RXL", "seq", "CXXC")))
    
    plot_start <- -100
    plot_end <- 500
    
    dfplot <- ggplot() +
      xlim(plot_start, plot_end) +
      scale_y_continuous(expand = c(0,0), limits =c(0, 2.5))
    
    # white background
    dfplot <- dfplot + theme_bw() +  
      theme(panel.grid.minor=element_blank(),
            panel.grid.major=element_blank()) +
      theme(axis.ticks = element_blank(),
            axis.text.y = element_blank()) +
      theme(panel.border = element_blank())
    
    #plot chains
    dfplot <- dfplot + geom_rect(data= df[df$type == "Chain",],
                                 mapping=aes(xmin=begin,
                                             xmax=length,
                                             ymin=order-0.2,
                                             ymax=order+0.2),
                                 colour = "blue",
                                 fill = "#C4D9E9")
    
    
    
    #plot motif positions
    
    dfplot <- dfplot + geom_point(data = filter(df, type %in% c("PTM", "Motif")),
                                  aes(begin, order, shape = description, color = description), 
                                  size = 3)
    dfplot