Search code examples
rggplot2geom-bar

A single bar is misplaced in a barplot (ggplot2)


I'm making a 2-panel barplot, but there seems to be a problem with a single bar that is out of its place. It is the bar in the upper part of the right panel. It is filled as TR but it occupies the place of the TL.

The data is:

DATA2=structure(list(A = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("For", "Par"), class = "factor"), 
B = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L), .Label = c("TL", "TR"), class = c("ordered", 
"factor")), C = structure(c(1L, 4L, 4L, 1L, 1L, 1L, 4L, 1L, 
1L, 1L, 1L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 4L, 1L, 5L, 1L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 
5L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 3L, 4L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 5L, 4L, 1L, 4L, 5L, 1L, 1L, 1L, 1L, 1L, 3L, 5L, 
3L, 4L, 4L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 
3L, 1L, 4L, 1L, 1L, 1L, 1L, 2L, 4L, 1L, 1L, 3L, 1L, 1L, 1L, 
5L, 4L, 1L, 4L, 5L, 1L, 1L, 1L, 1L, 4L, 2L, 1L, 4L, 1L, 1L, 
1L, 1L, 2L, 4L, 5L, 1L, 1L, 5L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
1L, 1L, 1L, 5L, 1L, 4L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 
1L, 4L, 1L, 1L, 1L, 5L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
4L, 4L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 4L, 1L, 3L, 1L, 1L, 1L, 3L, 5L, 3L, 1L, 1L, 
4L, 1L, 1L, 1L, 5L, 1L, 4L), .Label = c("0-2", "2-4", "4-6", 
"6-8", "8-10"), class = c("ordered", "factor"))), row.names = c(NA, 
-240L), class = "data.frame")

The plot code is as follows:

ggplot(DATA2, aes(x=C,fill=B)) +
  geom_bar(position = position_dodge(width = -0.9, preserve = "single")) +
  facet_grid(.~A) +
  theme_bw() + 
  scale_fill_manual(values = c("grey","grey40")) + 
  coord_flip()

enter image description here

The small bar (count = 1) in the upper part of the right panel is misplaced, although the fill is correct. Any tips? Thanks!


Solution

  • There are combinations of variable values that don't show up in your data, namely c("Par","TL","8-10") and c("Par", "TR", "2-4"). When you use geom_bar(stat = "count") (which is the default), ggplot2 apparently doesn't generate these combinations where the count would be 0. For each pairing, bars are rendered from the top downward. When one of these counts doesn't exist, the other is rendered at the top of the bar pairing.

    We can see the same thing is this simpler example:

    library(dplyr)
    library(tidyr)
    library(ggplot2)
    
    df <- data.frame(A = rep(c("Group 1","Group 2"),each=3),
                     B = c("possum",rep("dog",2),rep("possum",3)))
    
    df
    #>         A      B
    #> 1 Group 1 possum
    #> 2 Group 1    dog
    #> 3 Group 1    dog
    #> 4 Group 2 possum
    #> 5 Group 2 possum
    #> 6 Group 2 possum
    
    ggplot(df, aes(x=A,fill=B)) +
      geom_bar(position = position_dodge(width = -0.9, preserve = "single"))
    

    The way around this is to do the counting manually:

    df_tally <- df %>% 
      group_by(A,B) %>% 
      tally() %>% 
      ungroup()
    
    df_tally
    #>   A       B          n
    #> 1 Group 1 dog        2
    #> 2 Group 1 possum     1
    #> 3 Group 2 possum     3
    

    We then need to add the missing combination:

    df_tally <- complete(df_tally, A, B)
    
    df_tally
    #>   A       B          n
    #> 1 Group 1 dog        2
    #> 2 Group 1 possum     1
    #> 3 Group 2 dog       NA
    #> 4 Group 2 possum     3
    

    Now, since we've done the counting already, we use stat = "identity":

    ggplot(df_tally, aes(x=A, y=n, fill=B)) +
      geom_bar(stat = "identity",
               position = position_dodge(width = -0.9, preserve = "single"))
    #> Warning: Removed 1 rows containing missing values (geom_bar).
    

    In your case, this looks like:

    DATA3 <- DATA2 %>% 
      group_by(A,B,C) %>% 
      tally() %>% 
      ungroup() %>% 
      complete(A,B,C)
    
    
    ggplot(DATA3, aes(x=C,y=n,fill=B)) +
      geom_bar(stat="identity", 
               position = position_dodge(width = -0.9, preserve = "single")) +
      facet_grid(.~A) +
      theme_bw() + 
      scale_fill_manual(values = c("grey","grey40")) + 
      coord_flip()
    #> Warning: Removed 2 rows containing missing values (geom_bar).
    

    Update: dplyr 0.8.0

    As of dplyr 0.8.0, group_by has the .drop parameter that maintains groups for factors without any data. This slightly simplifies the necessary code. Example:

    DATA3 <- DATA2 %>% 
      group_by(A,B,C, .drop = FALSE) %>% 
      summarise(n = n())