Search code examples
rtime-seriesforecastingautoregressive-models

Multiple VAR forecast with optimal lags


I would like to run the code below automatically for 72 columns (the example code below contains only 3 columns).

library(fpp2)
library(vars)    

#Make up some Time Series
dj1=diff(log(dj))
dj2=diff(log(dj+2))
dj3=diff(log(dj+7))

###Actual Problem
dataframe=as.data.frame(cbind(dj1,dj2,dj3)) # My data are organized like this
exog=diff(log(dj-50)) # here I have an exogenous variable
exog2=diff(log(dj+500)) # another exogenous variable

a=VAR(cbind(dataframe$dj1, exog, exog2), p=as.integer(VARselect(dataframe$dj1, lag.max=8, type="const")$selection[1]))
b=VAR(cbind(dataframe$dj2, exog, exog2), p=as.integer(VARselect(dataframe$dj1, lag.max=8, type="const")$selection[1]))
c=VAR(cbind(dataframe$dj3, exog, exog2), p=as.integer(VARselect(dataframe$dj1, lag.max=8, type="const")$selection[1]))


af=forecast(a, h=5)
bf=forecast(b, h=5)
cf=forecast(c, h=5)

So as output I get:

> forecast(a, h=5)
dataframe.dj1
    Point Forecast        Lo 80       Hi 80       Lo 95      Hi 95
293  -4.704020e-04 -0.008111334 0.007170530 -0.01215620 0.01121539
294   4.316116e-05 -0.007670049 0.007756371 -0.01175318 0.01183950
295   1.787726e-04 -0.007535208 0.007892754 -0.01161874 0.01197629
296   1.886693e-04 -0.007525322 0.007902661 -0.01160886 0.01198620
297   1.902704e-04 -0.007523721 0.007904262 -0.01160726 0.01198780

exog
    Point Forecast        Lo 80       Hi 80       Lo 95      Hi 95
293  -4.769460e-04 -0.008220712 0.007266820 -0.01232001 0.01136612
294   4.375474e-05 -0.007773280 0.007860789 -0.01191137 0.01199888
295   1.811888e-04 -0.007636627 0.007999005 -0.01177513 0.01213751
296   1.912196e-04 -0.007626607 0.008009046 -0.01176511 0.01214755
297   1.928423e-04 -0.007624984 0.008010669 -0.01176349 0.01214918

exog2
    Point Forecast        Lo 80       Hi 80       Lo 95       Hi 95
293  -4.136372e-04 -0.007158961 0.006331687 -0.01072972 0.009902446
294   3.800451e-05 -0.006771035 0.006847044 -0.01037552 0.010451532
295   1.577389e-04 -0.006651981 0.006967459 -0.01025683 0.010572307
296   1.664683e-04 -0.006643261 0.006976198 -0.01024811 0.010581051
297   1.678812e-04 -0.006641848 0.006977611 -0.01024670 0.010582464

> forecast(b, h=5)
dataframe.dj2
    Point Forecast        Lo 80       Hi 80       Lo 95      Hi 95
293  -0.0004701598 -0.008107034 0.007166714 -0.01214975 0.01120943
294   0.0000431479 -0.007665956 0.007752252 -0.01174691 0.01183321
295   0.0001786786 -0.007531196 0.007888553 -0.01161256 0.01196991
296   0.0001885686 -0.007521317 0.007898454 -0.01160268 0.01197982
297   0.0001901687 -0.007519717 0.007900054 -0.01160108 0.01198142

exog
    Point Forecast        Lo 80       Hi 80       Lo 95      Hi 95
293  -4.769620e-04 -0.008220727 0.007266803 -0.01232003 0.01136610
294   4.376503e-05 -0.007773248 0.007860778 -0.01191132 0.01199885
295   1.811901e-04 -0.007636604 0.007998984 -0.01177509 0.01213747
296   1.912195e-04 -0.007626585 0.008009024 -0.01176508 0.01214752
297   1.928420e-04 -0.007624963 0.008010647 -0.01176346 0.01214914

exog2
    Point Forecast        Lo 80       Hi 80       Lo 95      Hi 95
293  -4.136512e-04 -0.007158974 0.006331672 -0.01072973 0.00990243
294   3.801346e-05 -0.006771020 0.006847047 -0.01037550 0.01045153
295   1.577400e-04 -0.006651974 0.006967454 -0.01025682 0.01057230
296   1.664682e-04 -0.006643255 0.006976191 -0.01024810 0.01058104
297   1.678810e-04 -0.006641842 0.006977604 -0.01024669 0.01058245

> forecast(c, h=5)
dataframe.dj3
    Point Forecast        Lo 80       Hi 80       Lo 95      Hi 95
293  -4.695554e-04 -0.008096305 0.007157194 -0.01213366 0.01119455
294   4.306557e-05 -0.007655818 0.007741949 -0.01173136 0.01181749
295   1.784375e-04 -0.007521216 0.007878091 -0.01159717 0.01195404
296   1.883171e-04 -0.007511346 0.007887981 -0.01158730 0.01196394
297   1.899153e-04 -0.007509748 0.007889579 -0.01158570 0.01196553

exog
    Point Forecast        Lo 80       Hi 80       Lo 95      Hi 95
293  -4.770022e-04 -0.008220765 0.007266761 -0.01232007 0.01136606
294   4.374075e-05 -0.007773287 0.007860768 -0.01191137 0.01199885
295   1.811867e-04 -0.007636622 0.007998996 -0.01177512 0.01213749
296   1.912190e-04 -0.007626600 0.008009038 -0.01176510 0.01214754
297   1.928418e-04 -0.007624978 0.008010661 -0.01176348 0.01214916

exog2
    Point Forecast        Lo 80       Hi 80       Lo 95       Hi 95
293  -4.136862e-04 -0.007159007 0.006331635 -0.01072976 0.009902392
294   3.799233e-05 -0.006771045 0.006847030 -0.01037553 0.010451517
295   1.577371e-04 -0.006651981 0.006967455 -0.01025683 0.010572302
296   1.664678e-04 -0.006643259 0.006976195 -0.01024811 0.010581047
297   1.678808e-04 -0.006641847 0.006977608 -0.01024670 0.010582460

But I am only interested in equations where the endogenous variable is a part of the dataframe (or based on the example: those equations where dataframe.dj1, dataframe.dj2, dataframe.dj3 are endogenous). At the end I would like to have a matrix or dataframe with the point forecast of those variables in the dataframe.

Like this:

> data.frame(as.data.frame(af)[1:5,3],
as.data.frame(bf)[1:5,3],
as.data.frame(cf)[1:5,3])

  as.data.frame.af..1.5..3. as.data.frame.bf..1.5..3. as.data.frame.cf..1.5..3.
1             -4.704020e-04             -0.0004701598             -4.695554e-04
2              4.316116e-05              0.0000431479              4.306557e-05
3              1.787726e-04              0.0001786786              1.784375e-04
4              1.886693e-04              0.0001885686              1.883171e-04
5              1.902704e-04              0.0001901687              1.899153e-04

Solution

  • Special thanks to Frank who gave lots of advices in the stackoverflow chat.

    One possible solution:

    library(fpp2)
    
    #Make up some Time Series
    dj1=diff(log(dj))
    dj2=diff(log(dj+2))
    dj3=diff(log(dj+7))
    
    ###Actual Problem
    dataframe=as.data.frame(cbind(dj1,dj2,dj3)) # My data are organized like this
    exog=diff(log(dj-50)) # here I have an exogenous variable
    exog2=diff(log(dj+500)) # another exogenous variable
    
    vars = lapply(dataframe, function(v) VAR(cbind(v, exog, exog2)))
    fcasts = lapply(vars, forecast)
    res = data.frame(lapply(fcasts, function(x) x))
    
    
    
    resfinal=res[1:10,seq(3, ncol(res),7)]
    resfinal
    

    By the way: Frank mentioned that it should be possible to solve this problem without lapply/list, but I could not figure out how.