I wanted to know if there is a faster way of calculating moving average of 3 months or 6 months to each and every column in a data frame for the next 9 months.
I currently have about 7k columns so my current approach is using a for loop to calculate each and every column and as you can assume it is very time consuming.
I wanted to know if there is any other alternate methods? Is there something like parallel processing or anything similar which would get the desired output.
I am still beginner in R and I am not sure if there is any. I would definitely appreciate any other alternative methods and how would I be able to code them?
The data frame I am working with is given below:
structure(list(`Row Labels` = c("2019-01-01", "2019-02-01", "2019-03-01",
"2019-04-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01",
"2019-09-01", "2019-10-01", "2019-11-01", "2019-12-01", "2020-01-01",
"2020-02-01", "2020-03-01", "2020-04-01", "2020-05-01", "2020-06-01",
"2020-07-01", "2020-08-01", "2020-09-01", "2020-10-01", "2020-11-01",
"2020-12-01", "2021-01-01", "2021-02-01", "2021-03-01", "2021-04-01",
"2021-05-01", "2021-06-01", "2021-07-01", "2021-08-01", "2021-09-01",
"2021-10-01", "2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01",
"2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", "2022-07-01",
"2022-08-01", "2022-09-01", "2022-10-01"), `XYZ|703` = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1636, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), `XYZ|851` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 206, 1814,
2324, 772, 1116, 1636, 1906, 957, 829, 911, 786, 938, 1313, 2384,
1554, 1777, 1635, 1534, 1015, 827, 982, 685, 767, 511, 239, 5400,
1301, 426, 261, 201, 33, 0, 0, 0, 0, 0, 0), `XYZ|574` = c(0,
0, 0, 0, 0, 0, 0, 0, 74, 179, 464, 880, 324, 184, 90, 170, 140,
96, 78, 83, 83, 121, 245, 9000, 332, 123, 117, 138, 20, 42, 70,
70, 42, 103, 490, 7500, 488, 245, 142, 95, 63, 343, 57, 113,
100, 105), `XYZ|300` = c(82, 67, 140, 118, 24, 23, 93, 27, 63,
62, 76, 50625, 56, 65, 55, 136, 99, 53, 5, 137, 111, 126, 208,
50625, 61, 42, 225, 206, 69, 116, 82, 59, 51, 30, 63, 50625,
40, 39, 96, 92, 101, 77, 43, 0, 0, 0), `XYZ|513` = c(74, 33,
21, 31, 31, 29, 41, 21, 25, 60, 66, 5476, 47, 25, 23, 17, 34,
0, 0, 0, 0, 0, 0, 5476, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5476,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `XYZ|204` = c(13, 41, 65, 107,
42, 32, 79, 79, 44, 84, 79, 23409, 79, 36, 30, 51, 48, 51, 25,
39, 15, 21, 65, 23409, 48, 40, 130, 94, 90, 92, 41, 59, 35, 4,
83, 23409, 73, 38, 39, 28, 60, 71, 17, 51, 37, 26), `XYZ|895` = c(2,
7, 26, 22, 10, 28, 13, 23, 18, 19, 30, 2209, 13, 6, 24, 20, 10,
14, 23, 6, 21, 16, 32, 2209, 14, 14, 21, 27, 23, 45, 8, 12, 15,
8, 29, 2209, 6, 1, 8, 10, 7, 2, 9, 8, 6, 13), `XYZ|666` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22500, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 150, 22500, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22500, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), `XYZ|575` = c(0, 0, 179, 464, 880,
324, 184, 0, 0, 0, 0, 774400, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
774400, 0, 0, 0, 0, 0, 0, 0, 0, 0, 103, 490, 774400, 488, 245,
142, 0, 0, 0, 0, 0, 0, 0), `XYZ|345` = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -46L))
The code which I am currently using is given below:
Book7 <- read_excel("C:/X/X/X- X/X/Book7.xlsx",
sheet = "Loop")
Output = data.frame(matrix(NA,nrow = 9,ncol = ncol(Book7)))
colnames(Output) <- colnames(Book7)
for(i in 2:ncol(Book7))
{
Y<- ts(data = Book7[,i],
frequency = 12,
start = c(2019,1),
end = c(2022,10))
Print(Y)
mov_avg <- ma(Y, order = 3, centre = FALSE)
Forecast_Mov_Avg_3M <- forecast(mov_avg,h = 19)
print(Forecast_Mov_Avg_3M$mean)
Output[,i] = as.data.frame(Forecast_Mov_Avg_3M$mean)
}
You can try parallel processing with {furrr}
:
library(forecast)
library(purrr)
library(furrr)
library(dplyr)
forecast_s <- function(s){
ts(data = s,frequency = 12,start = c(2019,1), end = c(2022,10)) %>%
ma(order = 3, centre = FALSE) %>%
forecast(h = 19) %>%
getElement("mean")
}
# execute forecast_s in parallel in 8 sessions
# over all Book7 columns starting from 2nd
plan(multisession, workers = 8)
ts_list <- future_map(Book7[,-1], forecast_s)
# extract dates from the results of 1st column
date_val <- zoo::as.Date.ts(ts_list[[1]])
# convert named list of ts objects into named list of vectors,
# bind columns to data.frame / tibble, preceded by dates column
output <- map(ts_list, as.vector) %>%
bind_cols(date = date_val, .)
Result:
output
#> # A tibble: 19 × 11
#> date `XYZ|703` `XYZ|851` `XYZ|574` `XYZ|300` `XYZ|513` `XYZ|204`
#> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2022-10-01 2.64e-110 1.15e-15 206. 8.10 7.15 43.5
#> 2 2022-11-01 2.64e-110 1.15e-15 2134. 16857. 1831. 7839.
#> 3 2022-12-01 2.64e-110 1.15e-15 2185. 16853. 1831. 7849.
#> 4 2023-01-01 2.64e-110 1.15e-15 2138. 16831. 1829. 7837.
#> 5 2023-02-01 2.64e-110 1.15e-15 176. 1.11 10.0 55.8
#> 6 2023-03-01 2.64e-110 1.15e-15 162. 28.5 3.11 63.9
#> 7 2023-04-01 2.64e-110 1.15e-15 209. 43.7 2.97 75.7
#> 8 2023-05-01 2.64e-110 1.15e-15 153. 22.7 2.46 73.0
#> 9 2023-06-01 2.64e-110 1.15e-15 32.5 -7.47 1.03 60.7
#> 10 2023-07-01 2.64e-110 1.15e-15 29.4 -11.4 -2.75 54.0
#> 11 2023-08-01 2.64e-110 1.15e-15 85.2 -4.93 -2.04 42.0
#> 12 2023-09-01 2.64e-110 1.15e-15 106. 0.000499 0.165 38.0
#> 13 2023-10-01 2.64e-110 1.15e-15 206. 8.10 7.15 43.5
#> 14 2023-11-01 2.64e-110 1.15e-15 2134. 16857. 1831. 7839.
#> 15 2023-12-01 2.64e-110 1.15e-15 2185. 16853. 1831. 7849.
#> 16 2024-01-01 2.64e-110 1.15e-15 2138. 16831. 1829. 7837.
#> 17 2024-02-01 2.64e-110 1.15e-15 176. 1.11 10.0 55.8
#> 18 2024-03-01 2.64e-110 1.15e-15 162. 28.5 3.11 63.9
#> 19 2024-04-01 2.64e-110 1.15e-15 209. 43.7 2.97 75.7
#> # ℹ 4 more variables: `XYZ|895` <dbl>, `XYZ|666` <dbl>, `XYZ|575` <dbl>,
#> # `XYZ|345` <dbl>
Created on 2023-08-31 with reprex v2.0.2