Search code examples
rggplot2facet-wrap

How to limit the number of factors in facet_wrap() to plot for only top N factors?


I am working with Covid data at districts level & trying to plot Cases timeseries for Districts using facet_wrap().

Some States have lots of districts and all of that wont fit so I want to limit the facet_wrap to top N districts.

I have tried to reorder the facet_wrap() by fct_reorder(Districts) but that only reorders & produces plot for all the districts.

Is there a way I can get top N levels of those fct_reorder(Districts) and plot only those top N or if there is any option to control the number of facets in facet_wrap ?

df:

library(tidyverse)
library(lubridate)

file_url <- "https://raw.githubusercontent.com/johnsnow09/covid19-df_stack-code/main/df_districts.csv"

df_districts <- read.csv(url(file_url))

df_districts <- df_districts %>% 
mutate(Date = ymd(Date))

I have tried:

df_districts %>% 
  filter(State == "Rajasthan",
         Date != max(Date),
         !is.na(Daily_confirmed)) %>% 

  # group_by(District, Date) %>% 
  # slice_max(order_by = Daily_confirmed, n = 20) %>% 
  
  mutate(District = fct_reorder(District, Daily_confirmed, 
                                .fun = max, .desc = TRUE)) %>% 

  ggplot(aes(x = Date, y = Daily_confirmed)) +
  geom_line(size = 1) +
  facet_wrap(~District)

Solution

  • One way is to summarize the table by whatever precedence you need (e.g., sum(Daily_confirmed)), then take the top "n" by that sorted variable.

    df_districts %>%
      group_by(District) %>%
      summarize(daily = sum(Daily_confirmed)) %>%
      slice_max(daily, n = 10)
    # # A tibble: 10 x 2
    #    District daily
    #    <chr>    <int>
    #  1 Jaipur   99843
    #  2 Jodhpur  72443
    #  3 Kota     39442
    #  4 Alwar    34650
    #  5 Udaipur  31297
    #  6 Bikaner  26144
    #  7 Ajmer    25866
    #  8 Bhilwara 19922
    #  9 Pali     16589
    # 10 Sikar    15031
    

    Using this data, we can left_join the original data back in on District (removing daily first, if desired), and your subset will have just those districts.

    out <- df_districts %>%
      group_by(District) %>%
      summarize(daily = sum(Daily_confirmed)) %>%
      slice_max(daily, n = 10) %>%
      select(-daily) %>%
      left_join(df_districts, by = "District")
    out
    # # A tibble: 3,660 x 11
    #    District Date       State     Confirmed Recovered Deceased Other Tested Daily_confirmed Daily_Recovered Daily_Deceased
    #    <chr>    <date>     <chr>         <int>     <int>    <int> <int>  <int>           <int>           <int>          <int>
    #  1 Jaipur   2021-04-27 Rajasthan    100651     68325      659     0 968783            3289             989             21
    #  2 Jaipur   2021-04-26 Rajasthan     97362     67336      638     0 968783            2878             961             11
    #  3 Jaipur   2021-04-25 Rajasthan     94484     66375      627     0 968783            3145             648             13
    #  4 Jaipur   2021-04-24 Rajasthan     91339     65727      614     0 968783            3260             657              9
    #  5 Jaipur   2021-04-23 Rajasthan     88079     65070      605     0 968783            3036             767             13
    #  6 Jaipur   2021-04-22 Rajasthan     85043     64303      592     0 968783            2317             658             11
    #  7 Jaipur   2021-04-21 Rajasthan     82726     63645      581     0 968783            3101             551              5
    #  8 Jaipur   2021-04-20 Rajasthan     79625     63094      576     0 968783            1875             413              9
    #  9 Jaipur   2021-04-19 Rajasthan     77750     62681      567     0 968783            2011             528             11
    # 10 Jaipur   2021-04-18 Rajasthan     75739     62153      556     0 968783            1963             258              4
    # # ... with 3,650 more rows
    

    For comparison,

    table(df_districts$District)
    #          Ajmer          Alwar       Banswara          Baran         Barmer      Bharatpur       Bhilwara        Bikaner 
    #            366            366            366            362            366            366            366            366 
    #       BSF Camp          Bundi    Chittorgarh          Churu          Dausa        Dholpur      Dungarpur       Evacuees 
    #            356            335            366            366            366            366            366            366 
    #     Ganganagar    Hanumangarh       Italians         Jaipur      Jaisalmer         Jalore       Jhalawar      Jhunjhunu 
    #            342            366            366            366            366            356            366            366 
    #        Jodhpur        Karauli           Kota         Nagaur    Other State           Pali     Pratapgarh      Rajsamand 
    #            366            366            366            366            366            366            366            366 
    # Sawai Madhopur          Sikar         Sirohi           Tonk        Udaipur 
    #            366            366            355            366            366 
    
    table(out$District)
    #    Ajmer    Alwar Bhilwara  Bikaner   Jaipur  Jodhpur     Kota     Pali    Sikar  Udaipur 
    #      366      366      366      366      366      366      366      366      366      366