Search code examples
rggplot2bar-chartgeom-bar

Compare the sales of 2015 and 2016 with Tier and Region from the following dataframe in R


Dataframe

Output to be displayed

I have written this code, but not getting the desired output as above:

dataframe%>%
group_by(Region,Tier)%>%
summarise(TotalSales2015=sum(Sales2015),TotalSales2016=sum(Sales2016))%>%
ggplot(aes(x=Tier, y=Sales, fill=Year)) +
geom_bar(stat="identity", position=position_dodge())+
geom_text(aes(label=Sales))

dput(dataframe)

    structure(list(AccountId = c(1116L, 1116L, 2391L, 2391L, 2397L, 
    2400L, 2400L, 2404L, 2406L, 2408L), AccountName = c("Account1", 
    "Account1", "Account2", "Account2", "Account3", "Account4", "Account4", 
    "Account5", "Account6", "Account7"), Region = c("West", "West", 
    "East", "East", "East", "East", "East", "East", "East", "East"
    ), Division = c("DIAMONDBACK", "DIAMONDBACK", "MINUTEMEN", "MINUTEMEN", 
    "MINUTEMEN", "MINUTEMEN", "MINUTEMEN", "EMPIRE", "BIG APPLE", 
    "BIG APPLE"), City = c("PHOENIX W", "PHOENIX W", "HARTFORD", 
    "HARTFORD", "WORCESTER", "PORTLAND", "PORTLAND", "BRIDGEPORT", 
    "JERSEY CITY", "JERSEY CITY"), State = c("AZ", "AZ", "CT", "CT", 
    "MA", "ME", "ME", "CT", "NJ", "NJ"), Tier = c("Low", "Low", "Med", 
    "Med", "Med", "High", "High", "Low", "Med", "High"), Month = c("Aug", 
    "Oct", "Jun", "Mar", "Sep", "Jul", "Feb", "Mar", "Mar", "Aug"
    ), Sales2015 = c(0, 10500.78, 0, 19881, 3684.48, 0, 2631.31, 
    4153.89, 0, 0), Sales2016 = c(13208.52, 23114.91, 6627, 13254, 
    0, 10525.24, 42812.62, 3918.77, 6951.86, 10994.54), Units2015 = c(0, 
    3, 0, 9, 1, 0, 1, 1, 0, 0), Units2016 = c(4, 7, 3, 6, 0, 4, 17, 
    1, 2, 4), TargetAchevied2015 = c(0.7, 0.84, 1.15, 1.33, 1.02, 
    1.03, 1.08, 0.79, 1.12, 1.11), TargetAchevied2016 = c(1.53, 1.31, 
    1.29, 1.17, 1.53, 1.45, 0.99, 1.46, 1.02, 1.54)), row.names = c(NA, 
    10L), class = "data.frame")

Solution

  • With tidyverse friends dpyr, tidyr, scales and ggplot you could try this:

    library(dplyr)
    library(tidyr)
    library(ggplot2)
    
    df <- 
      dataframe %>%
      pivot_longer(cols = contains("20"), names_pattern = "(\\D*)(\\d{4})", names_to = c("metric", "Year")) %>% 
      pivot_wider(names_from = metric, values_from = value) %>% 
      group_by(Region, Tier, Year) %>%
      summarise(Sales = sum(Sales))
    
    
      ggplot(df, aes(x = Tier, y = Sales, fill = Year)) +
      geom_col(position = position_dodge(width = 0.9))+
      geom_text(aes(label = scales::comma(Sales)),
                position = position_dodge(width = 0.9),
                angle = 90,
                hjust = 0)+
        scale_y_continuous(expand = expansion(mult = c(0.06, 0.2)))+
        labs(fill = "Total sales")+
        facet_wrap(~Region, nrow = 1)
    

    Created on 2021-08-29 by the reprex package (v2.0.0)

    data

    dataframe <- structure(list(AccountId = c(1116L, 1116L, 2391L, 2391L, 2397L, 
    2400L, 2400L, 2404L, 2406L, 2408L), AccountName = c("Account1", 
    "Account1", "Account2", "Account2", "Account3", "Account4", "Account4", 
    "Account5", "Account6", "Account7"), Region = c("West", "West", 
    "East", "East", "East", "East", "East", "East", "East", "East"
    ), Division = c("DIAMONDBACK", "DIAMONDBACK", "MINUTEMEN", "MINUTEMEN", 
    "MINUTEMEN", "MINUTEMEN", "MINUTEMEN", "EMPIRE", "BIG APPLE", 
    "BIG APPLE"), City = c("PHOENIX W", "PHOENIX W", "HARTFORD", 
    "HARTFORD", "WORCESTER", "PORTLAND", "PORTLAND", "BRIDGEPORT", 
    "JERSEY CITY", "JERSEY CITY"), State = c("AZ", "AZ", "CT", "CT", 
    "MA", "ME", "ME", "CT", "NJ", "NJ"), Tier = c("Low", "Low", "Med", 
    "Med", "Med", "High", "High", "Low", "Med", "High"), Month = c("Aug", 
    "Oct", "Jun", "Mar", "Sep", "Jul", "Feb", "Mar", "Mar", "Aug"
    ), Sales2015 = c(0, 10500.78, 0, 19881, 3684.48, 0, 2631.31, 
    4153.89, 0, 0), Sales2016 = c(13208.52, 23114.91, 6627, 13254, 
    0, 10525.24, 42812.62, 3918.77, 6951.86, 10994.54), Units2015 = c(0, 
    3, 0, 9, 1, 0, 1, 1, 0, 0), Units2016 = c(4, 7, 3, 6, 0, 4, 17, 
    1, 2, 4), TargetAchevied2015 = c(0.7, 0.84, 1.15, 1.33, 1.02, 
    1.03, 1.08, 0.79, 1.12, 1.11), TargetAchevied2016 = c(1.53, 1.31, 
    1.29, 1.17, 1.53, 1.45, 0.99, 1.46, 1.02, 1.54)), row.names = c(NA, 
    10L), class = "data.frame")