Edited based on answers
I have data on marriage rates for the 50 states over time. I am trying to make individual box plots for each state and be able to also put these plots on a map of the states in R. If that is not possible, due to congestion, I would love to know how to place just the min or max values for each state on a map. Link to Data if interested
I had my data listed in R two ways the first way I thought would be better in terms of graphing.
marriage<-read.csv(file="~/Desktop/masters.csv", header=T, sep=",",check.names=FALSE)
marriagefine <-
marriage %>%
pivot_longer(
cols = `2017`:`1990`,
names_to = 'year',
values_to = 'rate'
) %>%
mutate(
year = as.numeric(year)
)
Which gets R to read my table something like this;
> marriagefine
# A tibble: 1,071 x 3
State year rate
<fct> <dbl> <dbl>
1 Alabama 2017 7
2 Alabama 2016 7.1
3 Alabama 2015 7.4
4 Alabama 2014 7.8
5 Alabama 2013 7.8
6 Alabama 2012 8.2
7 Alabama 2011 8.4
8 Alabama 2010 8.2
9 Alabama 2009 8.3
10 Alabama 2008 8.6
# … with 1,061 more rows
The other way to read it
State 2017 2016 2015 2014 2013 2012 2011 2010 2009 2008 2007 2006 2005 2004 2003 2002 2001 2000 1999 1995 1990
1 Alabama 7.0 7.1 7.4 7.8 7.8 8.2 8.4 8.2 8.3 8.6 8.9 9.2 9.2 9.4 9.6 9.9 9.4 10.1 10.8 9.8 10.6
2 Alaska 6.9 7.1 7.4 7.5 7.3 7.2 7.8 8.0 7.8 8.4 8.5 8.2 8.2 8.5 8.1 8.3 8.1 8.9 8.6 9.0 10.2
3 Arizona 5.8 5.9 5.9 5.8 5.4 5.6 5.7 5.9 5.6 6.0 6.4 6.5 6.6 6.7 6.5 6.7 7.6 7.5 8.2 8.8 10.0
4 Arkansas 9.5 9.9 10.0 10.1 9.8 10.9 10.4 10.8 10.7 10.6 12.0 12.4 12.9 13.4 13.4 14.3 14.3 15.4 14.8 14.4 15.3
5 California 6.3 6.5 6.2 6.4 6.5 6.0 5.8 5.8 5.8 6.7 6.2 6.3 6.4 6.4 6.1 6.2 6.5 5.8 6.4 6.3 7.9
6 Colorado 7.3 7.4 6.8 7.1 6.5 6.8 7.0 6.9 6.9 7.4 7.1 7.2 7.6 7.4 7.8 8 8.2 8.3 8.2 9.0 9.8
7 Connecticut 5.6 5.6 5.3 5.4 5 5.2 5.5 5.6 5.9 5.4 5.5 5.5 5.8 5.8 5.5 5.7 5.4 5.7 5.8 6.6 7.9
8 Delaware 5.5 5.6 5.7 6 6.6 5.8 5.2 5.2 5.4 5.5 5.7 5.9 5.9 6.1 6 6.4 6.5 6.5 6.7 7.3 8.4
9 District of Columbia 8.2 8.1 8.2 11.8 10.8 8.4 8.7 7.6 4.7 4.1 4.2 4 4.1 5.2 5.1 5.1 6.2 4.9 6.6 6.1 8.2
10 Florida 7.8 8.1 8.2 7.3 7 7.2 7.4 7.3 7.5 8.0 8.5 8.6 8.9 9.0 9 9.4 9.3 8.9 8.7 9.9 10.9
11 Georgia 6.9 6.8 6.2 --- --- 6.5 6.6 7.3 6.6 6.0 6.8 7.3 7.0 7.9 7 6.5 6.1 6.8 7.8 8.4 10.3
12 Hawaii 15.3 15.6 15.9 17.7 16.3 17.5 17.6 17.6 17.2 19.1 20.8 21.9 22.6 22.6 22 20.8 19.6 20.6 18.9 15.7 16.4
13 Idaho 7.8 8.1 8.2 8.4 8.2 8.2 8.6 8.8 8.9 9.5 10.0 10.1 10.5 10.8 10.9 11 11.2 10.8 12.1 13.1 13.9
My box plot command based on answers listed below
boxplot(rate ~ State, data = marriagefine,
main="Box Plot for Marriage Rates by State",
xlab="States", ylab="Rates",
col=rainbow(length(unique(marriagefine$State))))
How would I overlay each box plot and or minimum/maximum values for each plot onto the us map? I know this is the basic outline.
library(usmap)
plot_usmap(regions = c("states", "state", "counties", "county"),
include = c(), exclude = c(), data = data.frame(),
values = "values", labels = FALSE,
label_color = "black")
This calls for a shiny solution:
lapply(c("shiny", "data.table", "ggplot2", "RColorBrewer", "ggrepel"),
require, character.only = TRUE)
# mangle data
marriage <- fread("masters.csv", header = TRUE)
marriage <- melt(marriage, id.vars = "State")
marriage$variable <- as.numeric(as.character(marriage$variable ))
setnames(marriage, c("State", "year", "rate"))
marriage$State <- tolower(marriage$State)
states_map <- map_data("state")
marriage <- merge(data.table(data.frame(state.center),
state.abb, State=tolower(state.name)), marriage, by="State")
# pick fixed color palette
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_fill_gradientn(colours = myPalette(100),
limits = range(marriage$rate))
# Define UI
ui <- fluidPage(
titlePanel("Marriage"),
sidebarLayout(
sidebarPanel(
sliderInput("year", "Year", min(marriage$year),
max(marriage$year), value=min(marriage$year), step = 1)
),
mainPanel(
plotOutput(outputId = "box", height = "800px")
)
)
)
# Define server function
server <- function(input, output) {
output$box <- renderPlot({
req(input$year)
DT <- marriage[year==input$year]
ggplot(DT, aes(map_id = State)) +
geom_map(aes(fill = rate), map = states_map) +
expand_limits(x = states_map$long, y = states_map$lat) +
sc +
geom_text_repel(data=DT, aes(x=x, y=y, label = rate), size=10)
})
}
# Create Shiny object
shinyApp(ui = ui, server = server)
In reply to the request: A static version with two plots with the maxima and minima for each state next to each other:
# Load packages
lapply(c("data.table", "ggplot2", "RColorBrewer", "ggrepel", "cowplot"),
require, character.only = TRUE)
# mangle data
marriage <- fread("masters.csv", header = TRUE)
marriage <- melt(marriage, id.vars = "State")
marriage$variable <- as.numeric(as.character(marriage$variable ))
setnames(marriage, c("State", "year", "rate"))
marriage$State <- tolower(marriage$State)
states_map <- map_data("state")
marriage <- merge(data.table(data.frame(state.center),
state.abb, State=tolower(state.name)), marriage, by = "State")
# pick fixed color palette
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_fill_gradientn(colours = myPalette(100),
limits = range(marriage$rate))
# sort by State and rate
setkeyv(marriage, c("State", "rate"))
# pick year with largest and smallest rate (could be one of several)
DT.max <- marriage[, tail(.SD, 1), by = State]
DT.min <- marriage[, head(.SD, 1), by = State]
theme_set(theme_void())
# generate plot of maximum and minimum rates by State
p1 <- ggplot(DT.max, aes(map_id = State)) +
geom_map(aes(fill = rate), map = states_map) +
expand_limits(x = states_map$long, y = states_map$lat) +
sc +
geom_text_repel(data=DT.max, aes(x=x, y=y,
label = paste0(rate, "\n(",year,")")), size=3.5) +
ggtitle("Maximum marriage rate 1990-2017 \nby State (year measured)") +
theme(plot.title = element_text(hjust = 0.5))
p2 <- ggplot(DT.min, aes(map_id = State)) +
geom_map(aes(fill = rate), map = states_map) +
expand_limits(x = states_map$long, y = states_map$lat) +
sc +
geom_text_repel(data=DT.min, aes(x=x, y=y,
label = paste0(rate, "\n(",year,")")), size=3.5) +
ggtitle("Minimum marriage rate 1990-2017 \nby State (year measured)") +
theme(plot.title = element_text(hjust = 0.5))
# plot plots next to each other
cowplot::plot_grid(p1, p2, ncol=2)