Search code examples
rsortingggplot2legendstacked-bar-chart

How to change the ordering how the categories are shown in the stacked chart and get rid of "a" in the legend in r?


I've been trying for hours to get rid of the "a" in the legend. I saw solutions here for that but the solution was to get rid of the legend altogether. However, I want to keep it. Apart from that, I want the values to start with the mandatory school at the bottom of the each column and have the highest educational attainment at the top. How can I do this? Please do not close my question. I find it disrespectful since I need support in doing this. Thank you!

earnings_data <- tibble::tibble(
  Group.1 = factor(rep(c("(25,34]", "(34,44]", "(44,54]", "(54,65]"), 18)),
  Group.2 = rep(rep(1:2, 9), each = 4L),
  Group.3 = rep(1:9, each = 8L),
  x = c(
    5.06818181818182, 5.80811808118081, 5.90760869565217, 6.00348432055749,
    4.35483870967742, 4.66666666666667, 4.72625698324022, 5.08411214953271,
    4.70833333333333, 5.38095238095238, 5.86842105263158, 6.46428571428571, 4,
    4.92857142857143, 5.31578947368421, 5.28571428571429, 5.3, 6.3,
    5.95652173913043, 6.66666666666667, 4.66666666666667, 4.69230769230769,
    5.54545454545455, 5.73333333333333, 5, 7.26666666666667, 7.13636363636364,
    7.54545454545455, 5, 5.9, 6.82608695652174, 5.83333333333333,
    6.08465608465608, 6.83566878980892, 7.28323197219809, 7.28296438883542,
    5.68085106382979, 6.04819277108434, 6.26519337016575, 6.38515901060071,
    5.74358974358974, 6.57692307692308, 7.23478260869565, 7.52631578947368,
    6.04347826086957, 6.43181818181818, 6.74324324324324, 6.20338983050847,
    6.1031746031746, 7.01630434782609, 7.37894736842105, 7.72950819672131,
    5.70114942528736, 6.46938775510204, 6.73913043478261, 7.19230769230769,
    7.23032069970845, 7.96165644171779, 8.35185185185185, 8.23263327948304,
    6.52486187845304, 7.21951219512195, 7.40825688073395, 7.74803149606299,
    7.59722222222222, 8.47927656367747, 8.64701436130007, 8.67477592829706,
    6.93237410071942, 7.87311178247734, 8.11830357142857, 8.28571428571429
  ),
  share = c(
    9.59238368157303, 9.42485144608869, 9.26469454935699, 9.07884380703652,
    8.90483213980824, 8.60539148301954, 8.19283090969065, 8.803490414879,
    8.91131010179019, 8.73168832315526, 9.20323794967356, 9.77569644399565,
    8.17925322471276, 9.08834712747471, 9.21476857137597, 9.15257840139337,
    10.0311384508647, 10.2230297801189, 9.34140315188156, 10.081749555502,
    9.54246209549822, 8.65267385380536, 9.61288638544082, 9.92766161736722,
    9.46333816119313, 11.7916428151637, 11.1917076248986, 11.4107074514545,
    10.2240665308909, 10.8796735178176, 11.8328259356068, 10.1008185060422,
    11.5162316247324, 11.0922612071819, 11.4220360607478, 11.0137534484817,
    11.6162798457357, 11.1529428342749, 10.8605329926627, 11.0563426797082,
    10.8707064005501, 10.6723937264978, 11.3460546311905, 11.3817646297641,
    12.3577847634247, 11.8603528718889, 11.6892187349767, 10.7415968228904,
    11.5512810253294, 11.3853790943181, 11.5721431438945, 11.6890448739816,
    11.6577862053377, 11.9296316331656, 11.6820892994844, 12.4539762239334,
    13.6845939590431, 12.9194049051995, 13.0979149627558, 12.4499020359058,
    13.3421243900356, 13.3128704475982, 12.8420007999215, 13.4162502721425,
    14.379016594924, 13.7593487022761, 13.5608079256008, 13.1185377538782,
    14.1754108045561, 14.5181162309553, 14.0728463708406, 14.3472850616437
  ),
)
#create age ranges
age_ranges <- cut(df_final$age, breaks=c(25,34,44,54,65))

#group data by age range, gender, and education level, and calculate mean value
earnings_data <- aggregate(df_final$earnings, by=list(age_ranges, df_final$gender, df_final$education), FUN=mean)

#calculate the percentage of each education level for each age range and gender group
earnings_data <- earnings_data %>%
  group_by(Group.1, Group.2) %>%
  mutate(share = x/sum(x)*100)

# Create a test data frame with one row for each education level
test_data <- data.frame(Group.3 = unique(earnings_data$Group.3))

# Fill in the color for each education level
test_data$color <- c("#2c7bb6", "#abd9e9", "#ffffbf", "#fdae61", "#d7191c", "#FF0000", "#00FF00", "#0000FF", "#FFFF00")

# Print out the test data frame
print(test_data)

#create plot with facets and text labels for education share

p <- ggplot(earnings_data, aes(x=Group.2, y=x, fill=interaction(Group.3))) + 
  geom_bar(stat="identity", position="stack") +
  facet_wrap(~Group.1,nrow=1) +
  labs(title="Mean Earnings by Age Range and Gender", x="Age Range", y="Mean Earnings") +
  scale_fill_manual(values=colors, name="Gender") +
  geom_text(aes(label=paste(round(share,1), "%"), color=as.factor(Group.3)), position=position_stack(vjust=0.5), size=3.5) +
  scale_color_manual(values=colors, name="Education Level", labels=c("Mandatory School", "Pre-vocational education", "Short apprenticeship commercial shool", "Diploma middle school", "Apprenticeship", "Full-time vocational school", "Teacher diploma", "Higher professional education", "university degree")) +
  guides(fill=FALSE, color=guide_legend(override.aes = list(shape = 22, size = 5), reverse=FALSE)) +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())
# Add footnote using ggtext
footnote <- "Note: The first column represents males and the second column represents females."
plot <- p + theme(plot.caption = element_markdown())
plot <- p + labs(caption = footnote)
plot

enter image description here


Solution

  • Here is a way.
    Instead of creating a data.frame with the colors, create a named vector of color codes, with names equal to the unique Group.3 values and a vector of legend text values. Coerce Group.3 to factor with the same levels as the colors' names reversing the levels from the highest to the lowest. Also reverse the legend text.

    suppressPackageStartupMessages({
      library(dplyr)
      library(ggplot2)
    })
    
    # Create values, order
    fill_colors <- c("#2c7bb6", "#abd9e9", "#ffffbf", "#fdae61", "#d7191c", "#FF0000", "#00FF00", "#0000FF", "#FFFF00")
    fill_levels <- sort(unique(earnings_data$Group.3))
    fill_colors <- setNames(fill_colors, fill_levels)
    fill_labels <- c("Mandatory School", "Pre-vocational education", "Short apprenticeship commercial shool", "Diploma middle school", "Apprenticeship", "Full-time vocational school", "Teacher diploma", "Higher professional education", "university degree")
    
    footnote <- "Note: The first column represents males and the second column represents females."
    
    earnings_data %>%
      mutate(Group.3 = factor(Group.3, levels = rev(fill_levels))) %>%
      ggplot(aes(Group.2, y = x, fill = Group.3)) + 
      geom_col(position="stack") +
      facet_wrap( ~ Group.1, nrow = 1) +
      scale_fill_manual(name = "Gender", values = fill_colors, labels = rev(fill_labels)) +
      labs(
        title = "Mean Earnings by Age Range and Gender", 
        x = "Age Range", 
        y = "Mean Earnings",
        caption = footnote
      ) +
      theme(
        axis.text.x = element_blank(), 
        axis.ticks.x = element_blank(),
        plot.caption = ggtext::element_markdown()
      )
    

    Created on 2023-04-14 with reprex v2.0.2