Search code examples
rdataframeforecasting

Efficient Way of Calculating 3 Month or 6 Month moving Average in all the Columns instead of Loop in R


I wanted to know if there is a faster way of calculating moving average of 3 months or 6 months to each and every column in a data frame for the next 9 months.

I currently have about 7k columns so my current approach is using a for loop to calculate each and every column and as you can assume it is very time consuming.

I wanted to know if there is any other alternate methods? Is there something like parallel processing or anything similar which would get the desired output.

I am still beginner in R and I am not sure if there is any. I would definitely appreciate any other alternative methods and how would I be able to code them?

The data frame I am working with is given below:

structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01", 
"2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01", 
"2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01", 
"2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01", 
"2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01", 
"2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01", 
"2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01", 
"2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01", 
"2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01", 
"2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|703` = c(0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1636, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0), `XYZ|851` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 206, 1814, 
2324, 772, 1116, 1636, 1906, 957, 829, 911, 786, 938, 1313, 2384, 
1554, 1777, 1635, 1534, 1015, 827, 982, 685, 767, 511, 239, 5400, 
1301, 426, 261, 201, 33, 0, 0, 0, 0, 0, 0), `XYZ|574` = c(0, 
0, 0, 0, 0, 0, 0, 0, 74, 179, 464, 880, 324, 184, 90, 170, 140, 
96, 78, 83, 83, 121, 245, 9000, 332, 123, 117, 138, 20, 42, 70, 
70, 42, 103, 490, 7500, 488, 245, 142, 95, 63, 343, 57, 113, 
100, 105), `XYZ|300` = c(82, 67, 140, 118, 24, 23, 93, 27, 63, 
62, 76, 50625, 56, 65, 55, 136, 99, 53, 5, 137, 111, 126, 208, 
50625, 61, 42, 225, 206, 69, 116, 82, 59, 51, 30, 63, 50625, 
40, 39, 96, 92, 101, 77, 43, 0, 0, 0), `XYZ|513` = c(74, 33, 
21, 31, 31, 29, 41, 21, 25, 60, 66, 5476, 47, 25, 23, 17, 34, 
0, 0, 0, 0, 0, 0, 5476, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5476, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `XYZ|204` = c(13, 41, 65, 107, 
42, 32, 79, 79, 44, 84, 79, 23409, 79, 36, 30, 51, 48, 51, 25, 
39, 15, 21, 65, 23409, 48, 40, 130, 94, 90, 92, 41, 59, 35, 4, 
83, 23409, 73, 38, 39, 28, 60, 71, 17, 51, 37, 26), `XYZ|895` = c(2, 
7, 26, 22, 10, 28, 13, 23, 18, 19, 30, 2209, 13, 6, 24, 20, 10, 
14, 23, 6, 21, 16, 32, 2209, 14, 14, 21, 27, 23, 45, 8, 12, 15, 
8, 29, 2209, 6, 1, 8, 10, 7, 2, 9, 8, 6, 13), `XYZ|666` = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22500, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 150, 22500, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22500, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0), `XYZ|575` = c(0, 0, 179, 464, 880, 
324, 184, 0, 0, 0, 0, 774400, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
774400, 0, 0, 0, 0, 0, 0, 0, 0, 0, 103, 490, 774400, 488, 245, 
142, 0, 0, 0, 0, 0, 0, 0), `XYZ|345` = c(0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -46L))

The code which I am currently using is given below:

Book7 <- read_excel("C:/X/X/X- X/X/Book7.xlsx", 
                   sheet = "Loop")

Output = data.frame(matrix(NA,nrow = 9,ncol = ncol(Book7)))
colnames(Output) <- colnames(Book7)


for(i in 2:ncol(Book7))
{
  Y<- ts(data = Book7[,i],
         frequency = 12,
         start = c(2019,1),
         end = c(2022,10))
  
  Print(Y)
  
  mov_avg <- ma(Y, order = 3, centre = FALSE)
  
  Forecast_Mov_Avg_3M <- forecast(mov_avg,h = 19)
  
  print(Forecast_Mov_Avg_3M$mean)
  
  Output[,i] = as.data.frame(Forecast_Mov_Avg_3M$mean)
  
}
 

Solution

  • You can try parallel processing with {furrr}:

    library(forecast)
    library(purrr)
    library(furrr)
    library(dplyr)
    
    forecast_s <- function(s){
        ts(data = s,frequency = 12,start = c(2019,1), end = c(2022,10)) %>% 
        ma(order = 3, centre = FALSE) %>% 
        forecast(h = 19) %>% 
        getElement("mean")
    }
    
    # execute forecast_s in parallel in 8 sessions 
    # over all Book7 columns starting from 2nd
    plan(multisession, workers = 8)
    ts_list <- future_map(Book7[,-1], forecast_s)
    
    # extract dates from the results of 1st column
    date_val <- zoo::as.Date.ts(ts_list[[1]])
    
    # convert named list of ts objects into named list of vectors, 
    # bind columns to data.frame / tibble, preceded by dates column
    output <- map(ts_list, as.vector) %>%  
      bind_cols(date = date_val, .) 
    

    Result:

    output
    #> # A tibble: 19 × 11
    #>    date       `XYZ|703` `XYZ|851` `XYZ|574`    `XYZ|300` `XYZ|513` `XYZ|204`
    #>    <date>         <dbl>     <dbl>     <dbl>        <dbl>     <dbl>     <dbl>
    #>  1 2022-10-01 2.64e-110  1.15e-15     206.      8.10         7.15       43.5
    #>  2 2022-11-01 2.64e-110  1.15e-15    2134.  16857.        1831.       7839. 
    #>  3 2022-12-01 2.64e-110  1.15e-15    2185.  16853.        1831.       7849. 
    #>  4 2023-01-01 2.64e-110  1.15e-15    2138.  16831.        1829.       7837. 
    #>  5 2023-02-01 2.64e-110  1.15e-15     176.      1.11        10.0        55.8
    #>  6 2023-03-01 2.64e-110  1.15e-15     162.     28.5          3.11       63.9
    #>  7 2023-04-01 2.64e-110  1.15e-15     209.     43.7          2.97       75.7
    #>  8 2023-05-01 2.64e-110  1.15e-15     153.     22.7          2.46       73.0
    #>  9 2023-06-01 2.64e-110  1.15e-15      32.5    -7.47         1.03       60.7
    #> 10 2023-07-01 2.64e-110  1.15e-15      29.4   -11.4         -2.75       54.0
    #> 11 2023-08-01 2.64e-110  1.15e-15      85.2    -4.93        -2.04       42.0
    #> 12 2023-09-01 2.64e-110  1.15e-15     106.      0.000499     0.165      38.0
    #> 13 2023-10-01 2.64e-110  1.15e-15     206.      8.10         7.15       43.5
    #> 14 2023-11-01 2.64e-110  1.15e-15    2134.  16857.        1831.       7839. 
    #> 15 2023-12-01 2.64e-110  1.15e-15    2185.  16853.        1831.       7849. 
    #> 16 2024-01-01 2.64e-110  1.15e-15    2138.  16831.        1829.       7837. 
    #> 17 2024-02-01 2.64e-110  1.15e-15     176.      1.11        10.0        55.8
    #> 18 2024-03-01 2.64e-110  1.15e-15     162.     28.5          3.11       63.9
    #> 19 2024-04-01 2.64e-110  1.15e-15     209.     43.7          2.97       75.7
    #> # ℹ 4 more variables: `XYZ|895` <dbl>, `XYZ|666` <dbl>, `XYZ|575` <dbl>,
    #> #   `XYZ|345` <dbl>
    

    Created on 2023-08-31 with reprex v2.0.2