Search code examples
ggplot2labelbar-chart

How can I align labels in a group of bar charts?


I would like to align the labels for the count and percentage over the columns in a group of bar charts. In some of the charts in the group, not all values are represented. There are three values for sentiments -- positive, neutral, and negative -- but some offices only have one or two of the responses. For the individual charts without a value, the labels don't align. How can I get the columns and labels in alignment.

Here is the data and my code -- really appreciate the support!

data

office  sentiment
1   positive
1   positive
1   neutral
1   neutral
1   positive
1   positive
1   positive
1   positive
1   neutral
1   neutral
1   neutral
1   positive
1   positive
1   negative
1   negative
1   positive
1   neutral
1   neutral
1   neutral
1   positive
1   neutral
1   neutral
1   negative
1   positive
1   positive
1   neutral
2   positive
2   positive
2   neutral
3   positive
3   positive
3   positive
3   positive
4   positive
4   negative
4   neutral
5   positive
6   positive
6   positive
6   positive
6   positive
6   neutral
6   positive
6   positive
6   positive
6   positive
7   positive
8   neutral
8   positive
8   positive


df <- [office, sentiment] # a data frame or tibble of the above data set

office_sentiment <- df[ , c("office","sentiment")]
office_sentiment <- office_sentiment %>% group_by(sentiment,office) %>% summarize(count = n()) 
office_sentiment <-  filter(office_sentiment,count >= 1,sentiment != "NA") #%>%
office_sentiment

office_sentiment_percentage <- df[ , c("office","sentiment")]
office_sentiment_percentage <- office_sentiment_percentage %>% group_by(sentiment,office) %>% summarize(count = n()) 
office_sentiment_percentage <-  filter(office_sentiment_percentage,count >= 1,sentiment != "NA") %>%
  mutate(percentage=count/sum(count)*100) 
  office_sentiment_percentage$percentage <- paste0(round(office_sentiment_percentage$percentage,1),"%")

# the function i use for most bar charts
myBarChart_2 <- function(data,var1,var2,count,title,xLabel,yLabel) {
     ggplot(data, aes_string(x=var1, y=count, fill=var2)) +
      ggtitle(title) +
      geom_bar(stat = 'idoffice',width=1,position = position_dodge2(padding=0.1,reverse=FALSE,preserve=c("single"))) +
      scale_color_manual(values=c("#66CCFF","#009999","#FF66CC"),aesthetics = c("colour", "fill")) +
      scale_y_continuous(sec.axis=waiver(),expand = expansion(mult = c(0,0.05))) +
      facet_wrap(var1, strip.position="bottom",scales = "free_x")  +
      xlab(xLabel) +
      ylab(yLabel) +
      theme_set(myBar_theme) +[![enter image description here][1]][1]
      theme(axis.text.x=element_blank())
}

# code for this sample set of charts

myBarChart_2(office_sentiment_percentage,'officeType','sentiment','count',"Sentiment by Office","sentiment","total") +
  (aes(group=factor(sentiment, levels=c('positive','neutral','negative')))) +
  geom_text(aes(label = count),vjust=1.5,position=position_dodge2(width=1),size=2.5) +
  geom_text(aes(label = percentage),vjust=-.3,position=position_dodge2(width=1),size=2.5) +
  guides(fill = guide_legend(reverse = TRUE))

enter image description here


Solution

  • There are a number of issues with your minimal reproducible example. I strongly recommend using the reprex package and following How to make a minimal reproducible example and How to make a great R reproducible example for future posts.

    The answer to your question is, I believe, very straightforward. If you add preserve = "single" to each geom_text() the labels appear to be positioned correctly. I wasn't able to run your example code, so I have stripped out some of the problematic sections to illustrate my answer:

    library(tidyverse)
    
    df <- structure(list(office = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                    1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 6L, 6L, 6L, 6L, 
                                    6L, 6L, 6L, 6L, 6L, 7L, 8L, 8L, 8L),
                         sentiment = c("positive", 
                                       "positive", "neutral", "neutral", "positive", "positive", "positive", 
                                       "positive", "neutral", "neutral", "neutral", "positive", "positive", 
                                       "negative", "negative", "positive", "neutral", "neutral", "neutral", 
                                       "positive", "neutral", "neutral", "negative", "positive", "positive", 
                                       "neutral", "positive", "positive", "neutral", "positive", "positive", 
                                       "positive", "positive", "positive", "negative", "neutral", "positive", 
                                       "positive", "positive", "positive", "positive", "neutral", "positive", 
                                       "positive", "positive", "positive", "positive", "neutral", "positive", 
                                       "positive")), class = "data.frame", row.names = c(NA, -50L))
    
    office_sentiment <- df[ , c("office","sentiment")]
    office_sentiment <- office_sentiment %>% group_by(sentiment,office) %>% summarize(Count = n()) 
    #> `summarise()` has grouped output by 'sentiment'. You can override using the `.groups` argument.
    office_sentiment <-  filter(office_sentiment, Count >= 1,sentiment != "NA") #%>%
    office_sentiment
    #> # A tibble: 15 × 3
    #> # Groups:   sentiment [3]
    #>    sentiment office Count
    #>    <chr>      <int> <int>
    #>  1 negative       1     3
    #>  2 negative       4     1
    #>  3 neutral        1    11
    #>  4 neutral        2     1
    #>  5 neutral        4     1
    #>  6 neutral        6     1
    #>  7 neutral        8     1
    #>  8 positive       1    12
    #>  9 positive       2     2
    #> 10 positive       3     4
    #> 11 positive       4     1
    #> 12 positive       5     1
    #> 13 positive       6     8
    #> 14 positive       7     1
    #> 15 positive       8     2
    
    office_sentiment_percentage <- df[ , c("office","sentiment")]
    office_sentiment_percentage <- office_sentiment_percentage %>% group_by(sentiment,office) %>% summarize(Count = n()) 
    #> `summarise()` has grouped output by 'sentiment'. You can override using the `.groups` argument.
    office_sentiment_percentage <-  filter(office_sentiment_percentage, Count >= 1,sentiment != "NA") %>%
      mutate(percentage=Count/sum(Count)*100) 
    office_sentiment_percentage$percentage <- paste0(round(office_sentiment_percentage$percentage,1),"%")
    
    
    ggplot(office_sentiment_percentage,
           aes(x = office, y = Count, fill = sentiment)) +
      ggtitle("Sentiment by Office") +
      geom_col(width = 1,
               position = position_dodge2(
                 padding = 0.1,
                 reverse = FALSE,
                 preserve = c("single")
               )) +
      scale_color_manual(
        values = c("#66CCFF", "#009999", "#FF66CC"),
        aesthetics = c("colour", "fill")
      ) +
      scale_y_continuous(sec.axis = waiver(), expand = expansion(mult = c(0, 0.05))) +
      facet_wrap(~office, strip.position = "bottom", scales = "free_x")  +
      xlab("sentiment") +
      ylab("total count") +
      theme(axis.text.x = element_blank()) +
      geom_text(
        aes(label = Count),
        vjust = 1.5,
        position = position_dodge2(width = 1, preserve = "single"),
        size = 2.5
      ) +
      geom_text(
        aes(label = percentage),
        vjust = -.3,
        position = position_dodge2(width = 1, preserve = "single"),
        size = 2.5
      ) +
      guides(fill = guide_legend(reverse = TRUE))
    

    Created on 2022-01-14 by the reprex package (v2.0.1)

    Does this solve your problem?