Search code examples
r

Change Legend Order for calendR Plot with Dynamic Number of Categories


I am generating calendar plots using calendR package to showcase the Air Quality Index (AQI) for each day. I'm trying to order my legend in calendR to be a specified order to no avail. The legend needs to be in order of health severity as follows:

  1. Good
  2. Moderate
  3. Unhealthy for sensitive groups
  4. Unhealthy
  5. Very unhealthy
  6. Hazardous

Unfortunately the legend is appearing in alphabetical order which could confuse viewers into thinking that the Air Quality is worse or better than it actually is.

I've tried several options (see below) and referenced primarily https://github.com/R-CoderDotCom/calendR in Option A-C attempts. I was looking at the "Colors order" section "Way 2". "Way 1" on that website I can't figure out how to work since each plot for the various communities I'm making a plot for has different number of unique AQI categories.

I also tried adding scale_fill_manual() to the plot as suggested on Reorder legend levels in CalendR by @Stefan in Option D, this ordered options correctly for communities with all AQI categories (community 1 in example data) but didn't work for communities with less AQI categories (community 2 in my example data). Perhaps there is a way to modify scale_fill_manual() so it is more dynamic and can accommodate unique combos of AQI categories?

In attempts to make the scale_fill_manual() path in Option D more dynamic I tried doing what I did to dynamically change the number of colors in my palette to the legend names in Option E, but that ended up sorting the legend levels in a strange way.

I suspect Option D with some way of dynamically changing number of legend names is the correct avenue, but need help in doing that.

I want my graph to look like below and work for communities that might not have every AQI category like community 2 in my example data.

option D

Data and Libraries to Use

library(calendR); library(tidyverse)    

# Community 1 data (Oct 25 - Dec 31)
community1_data <- data.frame(
  julian = 299:366,
  PM25 = c(3.4,1.3,1.2,1.2,0.4,3.4,1.0, #october data
           0.8,0.3,13.5,0.9,5.3,4.4,3.4,98.6,0.7,350.6,0.8,0.3,0.9,0.9,4.1,0.7,0.3,0.4,2.1,1.4,5.2,4.2,3.9,1.4,0.8,0.7,0.8,0.3,1.9,0.8, #november data
           0.7,1.2,1.7,67.9,3.8,6.1,5.9,225.3,0.7,0.3,0.6,2.9,37.5,1.1,33.2,0.9,1.5,1.1,0.8,1.5,0.8,2.2,4.6,1.2,1.0,3.3,0.9,0.9,4.6,1.2,2.8 #december data
  ),
  site_name = "community 1"
)

# Community 2 data (Mar 9 - Dec 31)
community2_data <- data.frame(
  julian = 69:366,
  PM25 = c(1.6,1.5,3.4,5.8,5.1,2.6,5.4,2.8,2.5,3.7,6.2,4.8,rep(NA,length(69:366)-12)),
  site_name = "community 2"
)

# Combine both communities
data <- rbind(community1_data, community2_data)

#create AQI category for each PM25 value
data$AQI <- case_when(
  data$PM25 <= 9.0 ~ "1",
  data$PM25 >= 9.1 & data$PM25 <= 35.4 ~ "2",
  data$PM25 >= 35.5 & data$PM25 <= 55.4 ~ "3",
  data$PM25 >= 55.5 & data$PM25 <= 125.4 ~ "4",
  data$PM25 >= 125.5 & data$PM25 <= 225.4 ~ "5",
  data$PM25 >= 225.5 ~ "6",
  TRUE ~ NA_character_
)

Option A to get the legend ordered

##################OPTION A########################
plot_calendar <- function(df, community, year){ # make use of a function
  
  #create objects with days in each AQI category
  filtered_data <- data %>% filter(site_name == community)
  
  #create event list for all days of the year
  days_in_year <- as.numeric(format(as.Date(paste(year, "12", "31", sep="-")), "%j"))
  events <- rep(NA, days_in_year)
  events[filtered_data$julian] <- case_when(
    filtered_data$AQI == "1" ~ "Good",
    filtered_data$AQI == "2" ~ "Moderate",
    filtered_data$AQI == "3" ~ "Unhealthy for sensitive groups",
    filtered_data$AQI == "4" ~ "Unhealthy",
    filtered_data$AQI == "5" ~ "Very unhealthy",
    filtered_data$AQI == "6" ~ "Hazardous",
    TRUE ~ NA_character_
  )
  
  #create color palette
  all_categories <- c("1", "2", "3", "4", "5", "6")
  colors <- c("Good"="green","Moderate" = "yellow", "Unhealthy for sensitive groups" = "orange", "Unhealthy" = "red","Very unhealthy" = "purple", "Hazardous" = "maroon")
  palette <- colors[match(unique(filtered_data$AQI[!is.na(filtered_data$AQI)]), all_categories)]
  
  #create plot
  calendR(year = year,
          start = "M",
          special.days = events,
          special.col = palette,
          low.col = "white",
          legend.pos = "right",
          legend.title = "Air Quality Index",
          title = paste(community, "AQI in", year),
          mbg.col = "lightgray",
          months.col = "white",
          weeknames = c("M","T","W","T","F","S","S")) # this returns the calendar plot 
  
}

plot_calendar(data, "community 1", 2024)
plot_calendar(data, "community 2", 2024)

Option A - community 1 Option A - community 2

Option B to get legend in order

#############Option B##################
plot_calendar <- function(df, community, year){ # make use of a function
  
  #create objects with days in each AQI category
  filtered_data <- data %>% filter(site_name == community)
  
  #create event list for all days of the year
  days_in_year <- as.numeric(format(as.Date(paste(year, "12", "31", sep="-")), "%j"))
  events <- rep(NA, days_in_year)
  events[filtered_data$julian] <- case_when(
    filtered_data$AQI == "1" ~ "Good",
    filtered_data$AQI == "2" ~ "Moderate",
    filtered_data$AQI == "3" ~ "Unhealthy for sensitive groups",
    filtered_data$AQI == "4" ~ "Unhealthy",
    filtered_data$AQI == "5" ~ "Very unhealthy",
    filtered_data$AQI == "6" ~ "Hazardous",
    TRUE ~ NA_character_
  )
  
  #create color palette
  all_categories <- c("1", "2", "3", "4", "5", "6")
  colors <- c("Good"="green","Moderate" = "yellow", "Unhealthy for sensitive groups" = "orange", "Unhealthy" = "red","Very unhealthy" = "purple", "Hazardous" = "maroon")
  palette <- colors[match(unique(filtered_data$AQI[!is.na(filtered_data$AQI)]), all_categories)]
  desired_order <- c("green","yellow","orange","red","purple","maroon")
  palette <- palette[order(match(palette, desired_order))]
  
  #create plot
  calendR(year = year,
          start = "M",
          special.days = events,
          special.col = palette,
          low.col = "white",
          legend.pos = "right",
          legend.title = "Air Quality Index",
          title = paste(community, "AQI in", year),
          mbg.col = "lightgray",
          months.col = "white",
          weeknames = c("M","T","W","T","F","S","S")) # this returns the calendar plot 
  
}

plot_calendar(data, "community 1", 2024)
plot_calendar(data, "community 2", 2024)

Option B - community 1 Option B - community 2

Option C to get legend in order

#############Option C##################
plot_calendar <- function(df, community, year){ # make use of a function
  
  #create objects with days in each AQI category
  filtered_data <- data %>% filter(site_name == community)
  
  #create event list for all days of the year
  days_in_year <- as.numeric(format(as.Date(paste(year, "12", "31", sep="-")), "%j"))
  events <- rep(NA, days_in_year)
  events[filtered_data$julian] <- case_when(
    filtered_data$AQI == "1" ~ "Good",
    filtered_data$AQI == "2" ~ "Moderate",
    filtered_data$AQI == "3" ~ "Unhealthy for sensitive groups",
    filtered_data$AQI == "4" ~ "Unhealthy",
    filtered_data$AQI == "5" ~ "Very unhealthy",
    filtered_data$AQI == "6" ~ "Hazardous",
    TRUE ~ NA_character_
  )
  
  #create color palette
  all_categories <- c("1", "2", "3", "4", "5", "6")
  desired_order <- c("green","yellow","orange","red","purple","maroon")
  colors <- c("Good"="green","Moderate" = "yellow", "Unhealthy for sensitive groups" = "orange", "Unhealthy" = "red","Very unhealthy" = "purple", "Hazardous" = "maroon")[order(desired_order)]
  palette <- colors[match(unique(filtered_data$AQI[!is.na(filtered_data$AQI)]), all_categories)]
  
  #create plot
  calendR(year = year,
          start = "M",
          special.days = events,
          special.col = palette,
          low.col = "white",
          legend.pos = "right",
          legend.title = "Air Quality Index",
          title = paste(community, "AQI in", year),
          mbg.col = "lightgray",
          months.col = "white",
          weeknames = c("M","T","W","T","F","S","S")) # this returns the calendar plot 
  
}

plot_calendar(data, "community 1", 2024)
plot_calendar(data, "community 2", 2024)

Option C - community 1 Option C - community 2

Option D to get legend in order

this works to get legend in order, but doesn't work for communities that don't have all the AQI categories (like community 2 in my example data) and produces "Error in calendR(year = year, start = "M", special.days = events, special.col = palette2, : The number of colors supplied on 'special.col' argument must be the same of length(unique(na.omit(special.days)))" error message.

#############Option D################
plot_calendar <- function(df, community, year){ # make use of a function
  
  #create objects with days in each AQI category
  filtered_data <- data %>% filter(site_name == community)
  
  #create event list for all days of the year
  days_in_year <- as.numeric(format(as.Date(paste(year, "12", "31", sep="-")), "%j"))
  events <- rep(NA, days_in_year)
  events[filtered_data$julian] <- case_when(
    filtered_data$AQI == "1" ~ "Good",
    filtered_data$AQI == "2" ~ "Moderate",
    filtered_data$AQI == "3" ~ "Unhealthy for sensitive groups",
    filtered_data$AQI == "4" ~ "Unhealthy",
    filtered_data$AQI == "5" ~ "Very unhealthy",
    filtered_data$AQI == "6" ~ "Hazardous",
    TRUE ~ NA_character_
  )
  
  #create color palette
  all_categories <- c("1", "2", "3", "4", "5", "6")
  desired_order <- c("green","yellow","orange","red","purple","maroon")
  colors <- c("Good"="green","Moderate" = "yellow", "Unhealthy for sensitive groups" = "orange", "Unhealthy" = "red","Very unhealthy" = "purple", "Hazardous" = "maroon")[order(desired_order)]
  palette <- colors[match(unique(filtered_data$AQI[!is.na(filtered_data$AQI)]), all_categories)]
  ordered_palette <- setNames(colors, c("Good","Moderate","Unhealthy for sensitive groups","Unhealthy","Very unhealthy","Hazardous"))
  
  #create plot
  calendR(year = year,
          start = "M",
          special.days = events,
          special.col = c(palette),
          low.col = "white",
          legend.pos = "right",
          legend.title = "Air Quality Index",
          title = paste(community, "AQI in", year),
          mbg.col = "lightgray",
          months.col = "white",
          weeknames = c("M","T","W","T","F","S","S")) + # this returns the calendar plot 
  scale_fill_manual(
    values = palette,
    limits = names(ordered_palette),
    na.value = "transparent"
  )
}

plot_calendar(data, "community 1", 2024)
plot_calendar(data, "community 2", 2024) #produces error "Error in calendR(year = year, start = "M", special.days = events, special.col = palette2,  : 
  The number of colors supplied on 'special.col' argument must be the same of length(unique(na.omit(special.days)))"

Option D - community 1

Option E ## sorts legend levels really wonky

###############OPTION E#################
plot_calendar <- function(df, community, year){
  #filter to community of interest
  filtered_data <- data %>% filter(site_name == community)
  #create event list for all days of the year
  days_in_year <- as.numeric(format(as.Date(paste(year, "12", "31", sep="-")), "%j"))
  events <- rep(NA, days_in_year)
  events[filtered_data$julian] <- case_when(
    filtered_data$AQI == "1" ~ "Good",
    filtered_data$AQI == "2" ~ "Moderate",
    filtered_data$AQI == "3" ~ "Unhealthy for sensitive groups",
    filtered_data$AQI == "4" ~ "Unhealthy",
    filtered_data$AQI == "5" ~ "Very unhealthy",
    filtered_data$AQI == "6" ~ "Hazardous",
    TRUE ~ NA_character_
  )
  
  #create color palette
  all_categories <- c("1", "2", "3", "4", "5", "6")
  colors <- c("Good"="green","Moderate" = "yellow", "Unhealthy for sensitive groups" = "orange", "Unhealthy" = "red","Very unhealthy" = "purple", "Hazardous" = "maroon")
  palette <- colors[match(unique(filtered_data$AQI[!is.na(filtered_data$AQI)]), all_categories)]
  ordered_palettenames <- setNames(colors, c("Good","Moderate","Unhealthy for sensitive groups","Unhealthy","Very unhealthy","Hazardous"))
  ordered_palette <- ordered_palettenames[match(unique(filtered_data$AQI[!is.na(filtered_data$AQI)]), all_categories)]
  
  #create plot
  calendR(year = year,
          start = "M",
          special.days = events,
          special.col = palette,
          low.col = "white",
          legend.pos = "right",
          legend.title = "Air Quality Index",
          title = paste(community, "AQI in", year),
          mbg.col = "lightgray",
          months.col = "white",
          weeknames = c("M","T","W","T","F","S","S")) + # this returns the calendar plot 
    scale_fill_manual(
      values = palette,
      limits = names(ordered_palette),
      na.value = "transparent")
}

plot_calendar(data, "community 1", 2024) 
plot_calendar(data, "community 2", 2024) 

Option E - community 1 Option E - community 2


Solution

  • The error is occurring because calendR expects the number of colors to match exactly the number of unique categories present in the data, not all possible categories. So in Community 2 you only have "Good" but the palette is still 6 long, so there is a mismatch. I fixed this by using unique values in events. I also combined the plots with patchwork for this output image, which forced me to "collect" the guides. out

    Code

    library(calendR); library(tidyverse)    
    
    # Community 1 data (Oct 25 - Dec 31)
    community1_data <- data.frame(
      julian = 299:366,
      PM25 = c(3.4,1.3,1.2,1.2,0.4,3.4,1.0, #october data
               0.8,0.3,13.5,0.9,5.3,4.4,3.4,98.6,0.7,350.6,0.8,0.3,0.9,0.9,4.1,0.7,0.3,0.4,2.1,1.4,5.2,4.2,3.9,1.4,0.8,0.7,0.8,0.3,1.9,0.8, #november data
               0.7,1.2,1.7,67.9,3.8,6.1,5.9,225.3,0.7,0.3,0.6,2.9,37.5,1.1,33.2,0.9,1.5,1.1,0.8,1.5,0.8,2.2,4.6,1.2,1.0,3.3,0.9,0.9,4.6,1.2,2.8 #december data
      ),
      site_name = "community 1"
    )
    
    # Community 2 data (Mar 9 - Dec 31)
    community2_data <- data.frame(
      julian = 69:366,
      PM25 = c(1.6,1.5,3.4,5.8,5.1,2.6,5.4,2.8,2.5,3.7,6.2,4.8,rep(NA,length(69:366)-12)),
      site_name = "community 2"
    )
    
    # Combine both communities
    data <- rbind(community1_data, community2_data)
    
    #create AQI category for each PM25 value
    data$AQI <- case_when(
      data$PM25 <= 9.0 ~ "1",
      data$PM25 >= 9.1 & data$PM25 <= 35.4 ~ "2",
      data$PM25 >= 35.5 & data$PM25 <= 55.4 ~ "3",
      data$PM25 >= 55.5 & data$PM25 <= 125.4 ~ "4",
      data$PM25 >= 125.5 & data$PM25 <= 225.4 ~ "5",
      data$PM25 >= 225.5 ~ "6",
      TRUE ~ NA_character_
    )
    
    plot_calendar <- function(df, community, year) {
      # Filter data for the specific community
      filtered_data <- data %>% filter(site_name == community)
      
      # Create event list for all days of the year
      days_in_year <- as.numeric(format(as.Date(paste(year, "12", "31", sep="-")), "%j"))
      events <- rep(NA, days_in_year)
      
      # Define full AQI categories and colors mapping in desired order
      aqi_mapping <- c(
        "1" = "Good",
        "2" = "Moderate",
        "3" = "Unhealthy for sensitive groups",
        "4" = "Unhealthy",
        "5" = "Very unhealthy",
        "6" = "Hazardous"
      )
      
      # Define colors in the same order as categories
      full_color_mapping <- c(
        "Good" = "green",
        "Moderate" = "yellow",
        "Unhealthy for sensitive groups" = "orange",
        "Unhealthy" = "red",
        "Very unhealthy" = "purple",
        "Hazardous" = "maroon"
      )
      
      events[filtered_data$julian] <- aqi_mapping[filtered_data$AQI]
      
      present_categories <- unique(na.omit(events))
      present_colors <- full_color_mapping[present_categories] # Get unique categories present in the data
      print(present_colors)
      # Create the calendar plot
      cal_plot <- calendR(
        year = year,
        start = "M",
        special.days = events,
        special.col = present_colors,  # Use only colors for present categories
        low.col = "white",
        legend.pos = "right",
        legend.title = "Air Quality Index",
        title = paste(community, "AQI in", year),
        mbg.col = "lightgray",
        months.col = "white",
        weeknames = c("M","T","W","T","F","S","S")
      )
      
      # Add the correct color scale with ordered factors
      cal_plot + scale_fill_manual(
        values = full_color_mapping,
        breaks = names(full_color_mapping),  # Force the order
        na.value = "transparent",
        drop = FALSE  # Keep all levels even if not present
      )
    }
    # Create both plots
    p1 <- plot_calendar(data, "community 1", 2024)
    p2 <- plot_calendar(data, "community 2", 2024)
    
    library(patchwork)
    combined_plot <- p1 + p2 + plot_layout(ncol = 2)
    combined_plot