Search code examples
rdataframeggplot2time-seriesforecasting

overlapping the predicted time series on the original series in R


I perform forecast

w=read.csv("C:/Users/admin/Documents/aggrmonth.csv", sep=";",dec=",")
w
#create time series object
w=ts(w$new,frequency = 12,start=c(2015,1)) 
w
#timeplot
plot.ts(w)

#forecast for the next months
library("forecast")
m <- stats::HoltWinters(w)
test=forecast:::forecast.HoltWinters(m,h=4) #h is how much month do you want to predict
test

now i want get forecast for 4 months ahead. From 01.2017-04.2017. I this know original values.

1-Jan-17    1020
1-Feb-17    800
1-Mar-17    1130
1-Apr-17    600

But i need get plot where displayed predicted values with CI are overlapped with original value. Of course if i don't clearly exlplain, i attached the plot. needed plot The green curve is the initial value of the series(my 4 months) and green dotted line is predictied values are overlapped on original values. Dashes on the predicted dotted curve are confidence intervals.

How to create such plot

w=

structure(list(yearMon = structure(c(9L, 7L, 15L, 1L, 17L, 13L, 
11L, 3L, 23L, 21L, 19L, 5L, 10L, 8L, 16L, 2L, 18L, 14L, 12L, 
4L, 24L, 22L, 20L, 6L), .Label = c("1-Apr-15", "1-Apr-16", "1-Aug-15", 
"1-Aug-16", "1-Dec-15", "1-Dec-16", "1-Feb-15", "1-Feb-16", "1-Jan-15", 
"1-Jan-16", "1-Jul-15", "1-Jul-16", "1-Jun-15", "1-Jun-16", "1-Mar-15", 
"1-Mar-16", "1-May-15", "1-May-16", "1-Nov-15", "1-Nov-16", "1-Oct-15", 
"1-Oct-16", "1-Sep-15", "1-Sep-16"), class = "factor"), new = c(8575L, 
8215L, 16399L, 16415L, 15704L, 19805L, 17484L, 18116L, 19977L, 
14439L, 9258L, 12259L, 4909L, 9539L, 8802L, 11253L, 11971L, 7838L, 
2095L, 4157L, 3910L, 1306L, 3429L, 1390L)), .Names = c("yearMon", 
"new"), class = "data.frame", row.names = c(NA, -24L))

Solution

  • We can use ggfortify to create a data frame then plot both timeseries with ggplot2

    # Load required libraries
    library(lubridate)
    library(magrittr)
    library(tidyverse)
    library(scales)
    library(forecast)
    library(ggfortify)
    
    w <- structure(list(yearMon = structure(c(9L, 7L, 15L, 1L, 17L, 13L, 
      11L, 3L, 23L, 21L, 19L, 5L, 10L, 8L, 16L, 2L, 18L, 14L, 12L, 
      4L, 24L, 22L, 20L, 6L), .Label = c("1-Apr-15", "1-Apr-16", "1-Aug-15", 
      "1-Aug-16", "1-Dec-15", "1-Dec-16", "1-Feb-15", "1-Feb-16", "1-Jan-15", 
      "1-Jan-16", "1-Jul-15", "1-Jul-16", "1-Jun-15", "1-Jun-16", "1-Mar-15", 
      "1-Mar-16", "1-May-15", "1-May-16", "1-Nov-15", "1-Nov-16", "1-Oct-15", 
      "1-Oct-16", "1-Sep-15", "1-Sep-16"), class = "factor"), new = c(8575L, 
      8215L, 16399L, 16415L, 15704L, 19805L, 17484L, 18116L, 19977L, 
      14439L, 9258L, 12259L, 4909L, 9539L, 8802L, 11253L, 11971L, 7838L, 
      2095L, 4157L, 3910L, 1306L, 3429L, 1390L)), .Names = c("yearMon", 
      "new"), class = "data.frame", row.names = c(NA, -24L))
    
    # create time series object
    w = ts(w$new, frequency = 12, start=c(2015, 1)) 
    w
    
    #>        Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov
    #> 2015  8575  8215 16399 16415 15704 19805 17484 18116 19977 14439  9258
    #> 2016  4909  9539  8802 11253 11971  7838  2095  4157  3910  1306  3429
    #>        Dec
    #> 2015 12259
    #> 2016  1390
    
    # forecast for the next months
    m <- stats::HoltWinters(w)
    
    # h is how much month do you want to predict
    pred = forecast:::forecast.HoltWinters(m, h=4) 
    pred
    
    #>          Point Forecast     Lo 80     Hi 80      Lo 95     Hi 95
    #> Jan 2017    -5049.00381 -9644.003 -454.0045 -12076.449  1978.441
    #> Feb 2017       37.44605 -5599.592 5674.4843  -8583.660  8658.552
    #> Mar 2017     -256.41474 -6770.890 6258.0601 -10219.444  9706.615
    #> Apr 2017     2593.09445 -4693.919 9880.1079  -8551.431 13737.620
    
    # plot
    plot(pred, include = 24, showgap = FALSE)
    

    # Convert pred from list to data frame object
    df1 <- fortify(pred) %>% as_tibble()
    
    # Create Date column, remove Index column and rename other columns 
    df1 %<>% 
      mutate(Date = as.Date(Index, "%Y-%m-%d")) %>% 
      select(-Index) %>% 
      rename("Low95" = "Lo 95",
             "Low80" = "Lo 80",
             "High95" = "Hi 95",
             "High80" = "Hi 80",
             "Forecast" = "Point Forecast")
    df1
    
    #> # A tibble: 28 x 8
    #>     Data Fitted Forecast Low80 High80 Low95 High95 Date      
    #>    <int>  <dbl>    <dbl> <dbl>  <dbl> <dbl>  <dbl> <date>    
    #>  1  8575     NA       NA    NA     NA    NA     NA 2015-01-01
    #>  2  8215     NA       NA    NA     NA    NA     NA 2015-02-01
    #>  3 16399     NA       NA    NA     NA    NA     NA 2015-03-01
    #>  4 16415     NA       NA    NA     NA    NA     NA 2015-04-01
    #>  5 15704     NA       NA    NA     NA    NA     NA 2015-05-01
    #>  6 19805     NA       NA    NA     NA    NA     NA 2015-06-01
    #>  7 17484     NA       NA    NA     NA    NA     NA 2015-07-01
    #>  8 18116     NA       NA    NA     NA    NA     NA 2015-08-01
    #>  9 19977     NA       NA    NA     NA    NA     NA 2015-09-01
    #> 10 14439     NA       NA    NA     NA    NA     NA 2015-10-01
    #> # ... with 18 more rows
    
    ### Avoid the gap between data and forcast
    # Find the last non missing NA values in obs then use that
    # one to initialize all forecast columns
    lastNonNAinData <- max(which(complete.cases(df1$Data)))
    df1[lastNonNAinData, 
        !(colnames(df1) %in% c("Data", "Fitted", "Date"))] <- df1$Data[lastNonNAinData]
    
    ggplot(df1, aes(x = Date)) + 
      geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
      geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
      geom_point(aes(y = Data, colour = "Data"), size = 4) +
      geom_line(aes(y = Data, group = 1, colour = "Data"), 
                linetype = "dotted", size = 0.75) +
      geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
      geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
      scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
      scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
      scale_fill_brewer(name = "Intervals") +
      guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
      theme_bw(base_size = 14)
    

    Edit: To included known values from "2017-01-01" to "2017-04-01"

    # Create new column which has known values
    df1$Obs <- NA
    df1$Obs[(nrow(df1)-3):(nrow(df1))] <- c(1020, 800, 1130, 600)
    
    ggplot(df1, aes(x = Date)) + 
      geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
      geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
      geom_point(aes(y = Data, colour = "Data"), size = 4) +
      geom_line(aes(y = Data, group = 1, colour = "Data"), 
                linetype = "dotted", size = 0.75) +
      geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
      geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
      scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
      scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
      scale_fill_brewer(name = "Intervals") +
      guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
      theme_bw(base_size = 14) +
      geom_line(aes(y = Obs, group = 4, colour = "Obs"), linetype = "dotted", size = 0.75)
    

    Or put those values directly into Data column

    df1$Data[(nrow(df1)-3):(nrow(df1))] <- c(1020, 800, 1130, 600)
    
    ggplot(df1, aes(x = Date)) + 
      geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
      geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
      geom_point(aes(y = Data, colour = "Data"), size = 3) +
      geom_line(aes(y = Data, group = 1, colour = "Data"), 
                linetype = "dotted", size = 0.75) +
      geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
      geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
      scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
      scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
      scale_fill_brewer(name = "Intervals") +
      guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
      theme_bw(base_size = 14)
    

    Created on 2018-04-21 by the reprex package (v0.2.0).