Search code examples
rggplot2geom-bar

Align max & min percent changes in a geom_bar plot


I am not sure if this is possible in ggplot or not, but I have been tinkering with this for quite some time and am unable to figure it out.

I am trying to create a plot that mimics this layout: enter image description here

I am pretty sure I have my data formatted correctly. This is what I have done so far:

library(tidyverse)
library(reshape2)
library(lubridate)

rental.data.melted <- melt(rental_data)

rental.data.melted <- rental.data.melted %>%
  slice(217:10908)

rental.data.melted <- rental.data.melted %>%
  rename(date = variable)

rental.data.melted$date <- lubridate::ym(rental.data.melted$date)

rental.one.year <- rental.data.melted %>%
  filter(year(date) >= 2021 & month(date) >= 3)

rental.one.year <- rental.one.year %>%
  group_by(RegionName) %>%
  mutate(prev_rent = lag(value),
         pct.chg = (value / prev_rent - 1) * 100)

one.year.results <- rental.one.year %>%
  filter(year(date) == 2022)

one.year.results <- one.year.results %>%
  filter(RegionName %in% c("Daytona Beach, FL", "Miami-Fort Lauderdale, FL", "Lakeland, FL", "New York, NY",
                           "North Port-Sarasota-Bradenton, FL", "Syracuse, NY", "Tulsa, OK", "McAllen, TX"))

The resulting data frame looks like this:

> as.tibble(one.year.results)
# A tibble: 8 x 5
  RegionName                        date       value prev_rent pct.chg
  <chr>                             <date>     <dbl>     <dbl>   <dbl>
1 New York, NY                      2022-03-01  2934      2804   4.64 
2 Miami-Fort Lauderdale, FL         2022-03-01  2832      2699   4.93 
3 Tulsa, OK                         2022-03-01  1286      1294  -0.618
4 McAllen, TX                       2022-03-01  1017      1020  -0.294
5 North Port-Sarasota-Bradenton, FL 2022-03-01  2402      2488  -3.46 
6 Syracuse, NY                      2022-03-01  1318      1334  -1.20 
7 Lakeland, FL                      2022-03-01  1808      1725   4.81 
8 Daytona Beach, FL                 2022-03-01  1766      1680   5.12 

As for the plotting, this is what I am working with so far but I cannot figure out how to get the bars "aligned" like in the example above so that the metro area with the biggest decrease (North Port-Sarasota, FL) is aligned with the metro with the largest increase (Daytona Beach, FL):

ggplot(data = one.year.results, aes(pct.chg)) +
  geom_bar(data = subset(one.year.results, pct.chg > 0),
           aes(y = RegionName), stat = "identity") +
  geom_bar(data = subset(one.year.results, pct.chg < 0),
           aes(y = RegionName), stat = "identity")

As well, here is the data in the reproducible form:

structure(list(RegionName = c("New York, NY", "Miami-Fort Lauderdale, FL", 
"Tulsa, OK", "McAllen, TX", "North Port-Sarasota-Bradenton, FL", 
"Syracuse, NY", "Lakeland, FL", "Daytona Beach, FL"), date = structure(c(19052, 
19052, 19052, 19052, 19052, 19052, 19052, 19052), class = "Date"), 
    value = c(2934, 2832, 1286, 1017, 2402, 1318, 1808, 1766), 
    prev_rent = c(2804, 2699, 1294, 1020, 2488, 1334, 1725, 1680
    ), pct.chg = c(4.63623395149786, 4.92775101889589, -0.618238021638329, 
    -0.294117647058822, -3.45659163987139, -1.19940029985007, 
    4.81159420289856, 5.11904761904762)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -8L), groups = structure(list(
    RegionName = c("Daytona Beach, FL", "Lakeland, FL", "McAllen, TX", 
    "Miami-Fort Lauderdale, FL", "New York, NY", "North Port-Sarasota-Bradenton, FL", 
    "Syracuse, NY", "Tulsa, OK"), .rows = structure(list(8L, 
        7L, 4L, 2L, 1L, 5L, 6L, 3L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -8L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE))

Solution

  • library(tidyverse)
    
    data <- structure(list(
      RegionName = c(
        "New York, NY", "Miami-Fort Lauderdale, FL",
        "Tulsa, OK", "McAllen, TX", "North Port-Sarasota-Bradenton, FL",
        "Syracuse, NY", "Lakeland, FL", "Daytona Beach, FL"
      ), date = structure(c(
        19052,
        19052, 19052, 19052, 19052, 19052, 19052, 19052
      ), class = "Date"),
      value = c(2934, 2832, 1286, 1017, 2402, 1318, 1808, 1766),
      prev_rent = c(2804, 2699, 1294, 1020, 2488, 1334, 1725, 1680), pct.chg = c(
        4.63623395149786, 4.92775101889589, -0.618238021638329,
        -0.294117647058822, -3.45659163987139, -1.19940029985007,
        4.81159420289856, 5.11904761904762
      )
    ), class = c(
      "grouped_df",
      "tbl_df", "tbl", "data.frame"
    ), row.names = c(NA, -8L), groups = structure(list(
      RegionName = c(
        "Daytona Beach, FL", "Lakeland, FL", "McAllen, TX",
        "Miami-Fort Lauderdale, FL", "New York, NY", "North Port-Sarasota-Bradenton, FL",
        "Syracuse, NY", "Tulsa, OK"
      ), .rows = structure(list(
        8L,
        7L, 4L, 2L, 1L, 5L, 6L, 3L
      ), ptype = integer(0), class = c(
        "vctrs_list_of",
        "vctrs_vctr", "list"
      ))
    ), row.names = c(NA, -8L), class = c(
      "tbl_df",
      "tbl", "data.frame"
    ), .drop = TRUE))
    
    data %>%
      group_by(sign(pct.chg)) %>%
      arrange(-abs(pct.chg)) %>%
      slice(1:3) %>%
      mutate(position = row_number()) %>%
      ggplot(aes(position, pct.chg)) +
      geom_col() +
      geom_label(aes(label = RegionName)) +
      geom_hline(yintercept = 0) +
      coord_flip()
    

    Created on 2022-04-28 by the reprex package (v2.0.0)