Search code examples
rcolorsbar-chart

Change the color of different values within a variable in ggplot


I am working on a ggplot that shows the time from a surgery until a patient is able to move around. I want each value of "mobility_categories" to be a distinct color. Here is my code:

DiabetesAndCRC3_only_rectal %>% 
    filter(!is.na(mobility_categories)) %>% 
    ggplot(aes(x=factor(mobility_categories), group = Diabetes_status)) +
    geom_bar(aes(y = after_stat(prop)),
             stat="count", width = 0.8) +
    geom_text(aes(label = scales::percent(after_stat(prop)), y= after_stat(prop)),
              stat= "count", vjust = -.5)+
    facet_grid(~Diabetes_status) +
    scale_y_continuous(labels = scales::percent, expand = c(0,0), limits = c(0,0.5))

enter image description here

The data is grouped by "Diabetes_status" and I think this is why i'm having issues. Normally I would just use scale_fill_manual, but this is not working.

Data:

structure(list(mobility_categories = c(">24", ">48", "<24", ">48", ">24", ">48", ">48", "<24", "<24", ">48", ">48", ">24", ">24", "<24", "<24", ">36", ">24", ">48", "<24", ">36", ">48", "<24", ">48", ">24", "<24", NA, ">48", ">48", "<24", NA, "<24", ">24", ">48", ">24", "<24", ">48", ">24", ">24", ">24", ">36) Diabetes_status = structure(c(1L, 1L, 2L, 3L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L), levels = c("Normal HbA1c", "High HbA1c", "T2D"), class = "factor")) 

Solution

  • Here is a suggestion how we can do it. The difference is that we calculate the proportions before:

    library(ggplot2)
    library(dplyr)
    library(scales)
    
    df <- structure(list(mobility_categories = c(">24", ">48", "<24", ">48", 
                                                 ">24", ">48", ">48", "<24", "<24", ">48", ">48", ">24", ">24", 
                                                 "<24", "<24", ">36", ">24", ">48", "<24", ">36", ">48", "<24", 
                                                 ">48", ">24", "<24", NA, ">48", ">48", "<24", NA, "<24", ">24", 
                                                 ">48", ">24", "<24", ">48", ">24", ">24", ">24", ">36"), 
                         Diabetes_status = c("High HbA1c", "T2D", "T2D", "Normal HbA1c", 
                                             "T2D", "Normal HbA1c", "T2D", "High HbA1c", "Normal HbA1c", 
                                             "High HbA1c", "Normal HbA1c", "Normal HbA1c", "T2D", "Normal HbA1c", 
                                             "High HbA1c", "Normal HbA1c", "Normal HbA1c", "T2D", "Normal HbA1c", 
                                             "High HbA1c", "Normal HbA1c", "T2D", "Normal HbA1c", "T2D", "High HbA1c", 
                                             "T2D", "High HbA1c", "High HbA1c", "T2D", "High HbA1c", "High HbA1c", 
                                             "T2D", "T2D", "Normal HbA1c", "High HbA1c", "High HbA1c", "Normal HbA1c", 
                                             "High HbA1c", "Normal HbA1c", "Normal HbA1c")), 
                    class = "data.frame", row.names = c(NA, -40L))
    
    df_summary <- df %>%
      filter(!is.na(mobility_categories)) %>%
      group_by(Diabetes_status, mobility_categories) %>%
      summarize(count = n()) %>%
      ungroup() %>%
      group_by(Diabetes_status) %>%
      mutate(prop = count/sum(count))
    
    ggplot(df_summary, aes(x = mobility_categories, y = prop, fill = mobility_categories)) +
      geom_bar(stat = "identity", width = 0.8) +
      geom_text(aes(label = scales::percent(prop)), position = position_stack(vjust = 0.5), size = 3) +
      facet_grid(. ~ Diabetes_status, scales = "free_x") +
      scale_y_continuous(labels = scales::percent) +
      scale_fill_manual(values = c(">24" = "red", ">48" = "blue", "<24" = "green", ">36" = "orange"))
    

    enter image description here