Search code examples
rggplot2legendaxisaesthetics

How to prevent R from alphabetically ranking data in ggplot and specify the order in which data is plotted (Data + Code + Graphs provided)?


I'm trying to fix an issue with my GGBalloonPlot graph with regards to how R processes the axis labels.

By default R plots the data using the labels ranked in reverse alphabetical order but to reveal the pattern of the data, the data need to be plotted in a specific order. The only way I've been able to do trick the software is by manually adding a prefix to each label in my .csv table so that R would rank them properly in my output. This is time consuming since I need to manually order the data first before adding the prefix and then plotting.

I would like to input a character vector (or something like that) which would essentially specify the order in which I want to have the data plotted which would reveal the pattern without the need for a prefix in the label name.

I have made some attempts with "scale_y_discrete" without success. I would also like to do the same thing for the X axis since I've had to use the same "trick" to display the columns in the proper non-alphabetical order which offsets the position of the labels. Any idea on how to get GGplot to display my values as seen in the graph without having to "trick" the software since this is quite time consuming ?

Data + Code

#Assign data to "Stack_Overflow_DummyData"

Stack_Overflow_DummyData <- structure(list(Species = structure(c(8L, 3L, 1L, 5L, 6L, 2L, 
                                     7L, 4L, 8L, 3L, 1L, 5L, 6L, 2L, 7L, 4L, 8L, 3L, 1L, 5L, 6L, 2L, 
                                     7L, 4L, 8L, 3L, 1L, 5L, 6L, 2L, 7L, 4L), .Label = c("Ani", "Cal", 
                                                                                         "Can", "Cau", "Fis", "Ort", "Sem", "Zan"), class = "factor"), 
               Species_prefix = structure(c(8L, 7L, 6L, 5L, 4L, 3L, 2L, 
                                            1L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, 8L, 7L, 6L, 5L, 4L, 3L, 
                                            2L, 1L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L), .Label = c("ac.Cau", 
                                                                                                "ad.Sem", "af.Cal", "ag.Ort", "as.Fis", "at.Ani", "be.Can", 
                                                                                                "bf.Zan"), class = "factor"), Dist = structure(c(2L, 3L, 
                                                                                                                                                 5L, 2L, 1L, 1L, 4L, 5L, 2L, 3L, 5L, 2L, 1L, 1L, 4L, 5L, 2L, 
                                                                                                                                                 3L, 5L, 2L, 1L, 1L, 4L, 5L, 2L, 3L, 5L, 2L, 1L, 1L, 4L, 5L
                                                                                                ), .Label = c("End", "Ind", "Pan", "Per", "Wid"), class = "factor"), 
               Region = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 
                                    4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Cen", "Col", 
                                                                                "Far", "Nor"), class = "factor"), Region_prefix = structure(c(1L, 
                                                                                                                                              1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                                                                                              3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
                                                                                                                                              4L), .Label = c("a.Far", "b.Nor", "c.Cen", "d.Col"), class = "factor"), 
               Frequency = c(75, 50, 25, 50, 0, 0, 0, 0, 11.1, 22.2, 55.6, 
                             55.6, 11.1, 0, 5.6, 0, 0, 2.7, 36.9, 27.9, 65.8, 54.1, 37.8, 
                             28.8, 0, 0, 0, 3.1, 34.4, 21.9, 78.1, 81.3)), class = "data.frame", row.names = c(NA, 
                                                                                                               -32L))



# Plot Data With Prefix Trick

library(ggplot2)
library(ggpubr)

# make color base on Dist, size and alpha dependent on Frequency
ggballoonplot(Stack_Overflow_DummyData, x = "Region_prefix", y = "Species_prefix", 
              size = "Frequency", size.range = c(1, 9), fill = "Dist") +
  theme_set(theme_gray() + 
  theme(legend.key=element_blank())) + 
  # Sets Grey Theme and removes grey background from legend panel
  theme(axis.title = element_blank()) +
  # Removes X axis title (Region)
  geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4) 
# Add Frequency Values Next to the circles

# Plot Data Without Prefix Trick

library(ggplot2)
library(ggpubr)

# make color base on Dist, size and alpha dependent on Frequency
ggballoonplot(Stack_Overflow_DummyData, x = "Region", y = "Species", 
              size = "Frequency", size.range = c(1, 9), fill = "Dist") +
  theme_set(theme_gray() + 
  theme(legend.key=element_blank())) + 
  # Sets Grey Theme and removes grey background from legend panel
  theme(axis.title = element_blank()) +
  # Removes X axis title (Region)
  geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4) 
# Add Frequency Values Next to the circles

Here below are the graphs

Good Graph.

Using the label prefix trick with the visible pattern in the data:

enter image description here

Wrong Graph (R default).

Without the prefix trick when GGplot automatically orders the data/labels and the graph makes no sense:

enter image description here

To sum up, I would like the Good graph output without having to have to previously add a prefix in my labels.

Many Thanks in advance for your help.


Solution

  • For the axis labels I would define a previous function to override the breaks:

    shlab <- function(lbl_brk){
      sub("^[a-z]+\\.","",lbl_brk) # removes the starts of strings as a. or ab.
    }
    

    Then, to change the labels you just have to use scale_x,y_discrete with labels = shlab (if you look at the help of scale_x_discrete you will see that one of the options for labels is A function that takes the breaks as input and returns labels as output).

    For the colours would be enough to change them (values) in scale_fill_manual and for the sizes, using guides so:

    library(ggplot2)
    library(ggpubr)
    shlab <- function(lbl_brk){
      sub("^[a-z]+\\.","",lbl_brk)
    }
    ggballoonplot(Stack_Overflow_DummyData, x = "Region_prefix", y = "Species_prefix", size = "Frequency", size.range = c(1, 9), fill = "Dist") +
      scale_x_discrete(labels = shlab) +
      scale_y_discrete(labels = shlab) +
      scale_fill_manual(values = c("green", "blue", "red", "black", "white")) +
      guides(fill = guide_legend(override.aes = list(size=8))) +
      theme_set(theme_gray() + theme(legend.key=element_blank())) +     # Sets Grey Theme and removes grey background from legend panel
      theme(axis.title = element_blank()) +                             # Removes X axis title (Region)
      geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4) # Add Frequency Values Next to the circles
    
    

    enter image description here

    UPDATE:

    With the new dataset and vector labels:

    library(ggplot2)
    library(ggpubr)
    
    # make color base on Dist, size and alpha dependent on Frequency
    ggballoonplot(Stack_Overflow_DummyData, x = "Region", y = "Species", 
                  size = "Frequency", size.range = c(1, 9), fill = "Dist") +
      scale_y_discrete(limits = c("Cau", "Sem", "Cal", "Ort", "Fis", "Ani", "Can", "Zan")) +
      scale_x_discrete(limits = c("Far", "Nor", "Cen", "Col")) +
      theme_set(theme_gray() + 
                  theme(legend.key=element_blank())) + 
      # Sets Grey Theme and removes grey background from legend panel
      theme(axis.title = element_blank()) +
      # Removes X axis title (Region)
      geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4) 
    

    enter image description here