Search code examples
rvariablesgraphboxplotusmap

Boxplot for 50 States over time


Edited based on answers

I have data on marriage rates for the 50 states over time. I am trying to make individual box plots for each state and be able to also put these plots on a map of the states in R. If that is not possible, due to congestion, I would love to know how to place just the min or max values for each state on a map. Link to Data if interested

I had my data listed in R two ways the first way I thought would be better in terms of graphing.

 marriage<-read.csv(file="~/Desktop/masters.csv", header=T, sep=",",check.names=FALSE)
 marriagefine <-
        marriage %>%
        pivot_longer(
          cols = `2017`:`1990`,
          names_to = 'year',
          values_to = 'rate'
        ) %>%
        mutate(
          year = as.numeric(year)
        )

Which gets R to read my table something like this;

> marriagefine
# A tibble: 1,071 x 3
  State    year  rate
  <fct>   <dbl> <dbl>
1 Alabama  2017   7  
2 Alabama  2016   7.1
3 Alabama  2015   7.4
4 Alabama  2014   7.8
5 Alabama  2013   7.8
6 Alabama  2012   8.2
7 Alabama  2011   8.4
8 Alabama  2010   8.2
9 Alabama  2009   8.3
10 Alabama  2008   8.6
# … with 1,061 more rows

The other way to read it

                  State 2017 2016 2015 2014 2013 2012 2011 2010 2009 2008 2007 2006 2005 2004 2003 2002 2001 2000 1999 1995 1990
1               Alabama  7.0  7.1  7.4  7.8  7.8  8.2  8.4  8.2  8.3  8.6  8.9  9.2  9.2  9.4  9.6  9.9  9.4 10.1 10.8  9.8 10.6
2                Alaska  6.9  7.1  7.4  7.5  7.3  7.2  7.8  8.0  7.8  8.4  8.5  8.2  8.2  8.5  8.1  8.3  8.1  8.9  8.6  9.0 10.2
3               Arizona  5.8  5.9  5.9  5.8  5.4  5.6  5.7  5.9  5.6  6.0  6.4  6.5  6.6  6.7  6.5  6.7  7.6  7.5  8.2  8.8 10.0
4              Arkansas  9.5  9.9 10.0 10.1  9.8 10.9 10.4 10.8 10.7 10.6 12.0 12.4 12.9 13.4 13.4 14.3 14.3 15.4 14.8 14.4 15.3
5           California   6.3  6.5  6.2  6.4  6.5  6.0  5.8  5.8  5.8  6.7  6.2  6.3  6.4  6.4  6.1  6.2  6.5  5.8  6.4  6.3  7.9
6              Colorado  7.3  7.4  6.8  7.1  6.5  6.8  7.0  6.9  6.9  7.4  7.1  7.2  7.6  7.4  7.8    8  8.2  8.3  8.2  9.0  9.8
7           Connecticut  5.6  5.6  5.3  5.4    5  5.2  5.5  5.6  5.9  5.4  5.5  5.5  5.8  5.8  5.5  5.7  5.4  5.7  5.8  6.6  7.9
8              Delaware  5.5  5.6  5.7    6  6.6  5.8  5.2  5.2  5.4  5.5  5.7  5.9  5.9  6.1    6  6.4  6.5  6.5  6.7  7.3  8.4
9  District of Columbia  8.2  8.1  8.2 11.8 10.8  8.4  8.7  7.6  4.7  4.1  4.2    4  4.1  5.2  5.1  5.1  6.2  4.9  6.6  6.1  8.2
10              Florida  7.8  8.1  8.2  7.3    7  7.2  7.4  7.3  7.5  8.0  8.5  8.6  8.9  9.0    9  9.4  9.3  8.9  8.7  9.9 10.9
11              Georgia  6.9  6.8  6.2  ---  ---  6.5  6.6  7.3  6.6  6.0  6.8  7.3  7.0  7.9    7  6.5  6.1  6.8  7.8  8.4 10.3
12               Hawaii 15.3 15.6 15.9 17.7 16.3 17.5 17.6 17.6 17.2 19.1 20.8 21.9 22.6 22.6   22 20.8 19.6 20.6 18.9 15.7 16.4
13                Idaho  7.8  8.1  8.2  8.4  8.2  8.2  8.6  8.8  8.9  9.5 10.0 10.1 10.5 10.8 10.9   11 11.2 10.8 12.1 13.1 13.9

My box plot command based on answers listed below

boxplot(rate ~ State, data = marriagefine, 
         main="Box Plot for Marriage Rates by State", 
         xlab="States", ylab="Rates",              
         col=rainbow(length(unique(marriagefine$State))))

How would I overlay each box plot and or minimum/maximum values for each plot onto the us map? I know this is the basic outline.

library(usmap)
plot_usmap(regions = c("states", "state", "counties", "county"),
include = c(), exclude = c(), data = data.frame(),
values = "values", labels = FALSE,
label_color = "black")

Solution

  • This calls for a shiny solution:

    lapply(c("shiny", "data.table", "ggplot2", "RColorBrewer", "ggrepel"),
        require, character.only = TRUE)
    
    # mangle data
    marriage <- fread("masters.csv", header = TRUE)
    marriage <- melt(marriage, id.vars = "State")
    marriage$variable <- as.numeric(as.character(marriage$variable ))
    setnames(marriage, c("State", "year", "rate"))
    marriage$State <- tolower(marriage$State)
    states_map <- map_data("state")
    marriage <- merge(data.table(data.frame(state.center), 
        state.abb, State=tolower(state.name)), marriage, by="State")
    
    # pick fixed color palette
    myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
    sc <- scale_fill_gradientn(colours = myPalette(100), 
        limits = range(marriage$rate))
    
    # Define UI
    ui <- fluidPage(
        titlePanel("Marriage"),
        sidebarLayout(
            sidebarPanel(
                sliderInput("year", "Year", min(marriage$year), 
                    max(marriage$year), value=min(marriage$year), step = 1)
            ),
            mainPanel(
                plotOutput(outputId = "box", height = "800px")
            )
        )
    )
    
    # Define server function
    server <- function(input, output) {
        output$box <- renderPlot({
            req(input$year)
            DT <- marriage[year==input$year]
            ggplot(DT, aes(map_id = State)) +
                geom_map(aes(fill = rate), map = states_map) +
                expand_limits(x = states_map$long, y = states_map$lat) +
                sc +
                geom_text_repel(data=DT, aes(x=x, y=y, label = rate), size=10)
        })
    }
    
    # Create Shiny object
    shinyApp(ui = ui, server = server)
    
    

    In reply to the request: A static version with two plots with the maxima and minima for each state next to each other:

    # Load packages
    lapply(c("data.table", "ggplot2", "RColorBrewer", "ggrepel", "cowplot"),
        require, character.only = TRUE)
    
    # mangle data
    marriage <- fread("masters.csv", header = TRUE)
    marriage <- melt(marriage, id.vars = "State")
    marriage$variable <- as.numeric(as.character(marriage$variable ))
    setnames(marriage, c("State", "year", "rate"))
    marriage$State <- tolower(marriage$State)
    states_map <- map_data("state")
    marriage <- merge(data.table(data.frame(state.center), 
        state.abb, State=tolower(state.name)), marriage, by = "State")
    
    # pick fixed color palette
    myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
    sc <- scale_fill_gradientn(colours = myPalette(100), 
        limits = range(marriage$rate))
    
    # sort by State and rate
    setkeyv(marriage, c("State", "rate"))
    
    # pick year with largest and smallest rate (could be one of several)
    DT.max <- marriage[, tail(.SD, 1), by = State]
    DT.min <- marriage[, head(.SD, 1), by = State]
    
    theme_set(theme_void())
    # generate plot of maximum and minimum rates by State
    p1 <- ggplot(DT.max, aes(map_id = State)) +
        geom_map(aes(fill = rate), map = states_map) +
        expand_limits(x = states_map$long, y = states_map$lat) +
        sc + 
        geom_text_repel(data=DT.max, aes(x=x, y=y, 
            label = paste0(rate, "\n(",year,")")), size=3.5) +
        ggtitle("Maximum marriage rate 1990-2017 \nby State (year measured)") +
        theme(plot.title = element_text(hjust = 0.5))
    
    p2 <- ggplot(DT.min, aes(map_id = State)) +
        geom_map(aes(fill = rate), map = states_map) +
        expand_limits(x = states_map$long, y = states_map$lat) +
        sc + 
        geom_text_repel(data=DT.min, aes(x=x, y=y, 
            label = paste0(rate, "\n(",year,")")), size=3.5) +
        ggtitle("Minimum marriage rate 1990-2017 \nby State (year measured)") +
        theme(plot.title = element_text(hjust = 0.5))
    
    # plot plots next to each other
    cowplot::plot_grid(p1, p2, ncol=2)