Search code examples
rhistogram

How to add line plot to histogram in R


I have a histogram plotted in R with the code shown below. I am trying to do 2 things:

  1. How to show percent[%] above each bars?
  2. Add a line plot on the top of existing histogram. That shows the percent[%] accumulation from left to right. For example, see attached figure as an example. The line plot starts at 12.5% then add the next bar (~22.92%) to 12.5%. So, it would plot at ~35.42%. It will add each bar % as its goes from left to right. Is there a way to make a similar line plot on my existing histogram chart in R?

Any help or guidance would be very much appreciated. Thanks!

    library(tidyverse)
    
    HoursfromSLA <- c("-100","-100","-100","-100","-100","-100","-100","-100","-100","-100","-100","-80","-80","-80","-80","-80","-80","-80","-80","-80","-80","-80","-80","-80","-80","-80","-80","-80","-80","-80","-80","-80","-80","-80","-80","-50","-50","-50","-50","-50","-50","-50","-50","-50","-50","-20","-20","-20","-20","-20","-20","-20","-20","-20","-20","20","20","20","20","50","50","50","50","50","50","50","50","75","75","75","75","75","75","100","100","100","100","135","135","135","135","225","225","225","225","310","310","350","350","400","400","500","500","500","500","675","675")
    
    data <- data.frame(HoursfromSLA)
    data$group <- ifelse(data$HoursfromSLA<0, "Green", "Red")
    data$HoursfromSLA <- as.numeric(data$HoursfromSLA)
    
    ggplot(data, aes(x=data, fill = group)) +
      geom_vline(xintercept = 0, colour="black") +
      geom_histogram(mapping = aes(x=HoursfromSLA, y=..count../sum(..count..)*100), col=I("white"), show.legend=FALSE, bins=25) +
      scale_fill_manual(values = c("Green" = "darkgreen", "Red" = "darkred")) +
      scale_x_continuous(name = "Time to SLA", breaks = seq(-150, 720, 30)) +
      scale_y_continuous(name = "[%]")

histogram with line plot


Solution

  • There might be a better way to do this with binned scales, but you could make a dataframe of the percentages for each column and work with that:

    data$HoursfromSLA2 <- as.numeric(as.character(cut(data$HoursfromSLA, breaks=seq(-120,900,30),labels = seq(-120,900-30,30)+15)))
    data2 <- aggregate(data=data, HoursfromSLA~HoursfromSLA2+group, length )
    data2$perc <- 100*data2$HoursfromSLA/sum(data2$HoursfromSLA)
    ggplot(data2, aes(x=HoursfromSLA2, y=perc)) + 
      geom_col(aes(fill=group),width =30) + 
      geom_text(aes(vjust=-.5,label=round(perc,1))) + 
      geom_line(aes(x=HoursfromSLA2-15,y=cumsum(perc))) + 
      geom_point(aes(x=HoursfromSLA2-15,y=cumsum(perc))) + 
      geom_text(vjust=-1,hjust=1,aes(x=HoursfromSLA2-15,y=cumsum(perc), label=round(cumsum(perc),1))) + 
      theme_bw()+  scale_fill_manual(values = c("Green" = "darkgreen", "Red" = "darkred")) +
      scale_x_continuous(name = "Time to SLA", breaks = seq(-150, 720, 30)) +
      scale_y_continuous(name = "[%]") + 
      geom_vline(xintercept=0) + 
      theme(legend.position = "none")
    

    enter image description here