I have a time series forecasting application where the algorithm can be selected from 2 choices:
There's no attribute to set the seed when the class object is initialized and then the fit function is called, so I am setting the NumPy random seed to a fixed seed: np.random.seed(123)
This seed is set globally in the first entry point of the application.
The problem is that the results of the forecasting are different between different machines, although we are running inside a Docker Image that depends on a Pipfile so all the dependencies have the same versions + the python version is the same as well (3.9):
numpy = "==1.22.3"
scikit-learn = "==1.1.1"
statsmodels = "==0.13.2"
scipy = "==1.9.3"
The differences in the forecasted values are also significant to our application for both algorithms. Example:
Locally windows 10 | Azure |
---|---|
8499 | 12693 |
140277 | 41278 |
Can anyone support this?
Update Code snippet + sample can be found here:
import pandas as pd
import numpy as np
import pmdarima as pm
from pmdarima.arima.utils import ndiffs
from statsmodels.tsa.statespace.sarimax import SARIMAX
np.random.seed(123)
def train_sarima_model(df_historical: pd.DataFrame):
historical_data = df_historical["value"].to_numpy()
# select the best model trained on the historical data
smodel = pm.auto_arima(
historical_data,
start_p=1,
start_q=1,
test="adf",
max_p=3,
max_q=3,
m=12,
start_P=0,
seasonal=True,
d=None,
D=1,
trace=True,
error_action="ignore",
suppress_warnings=True,
stepwise=True
)
mod = SARIMAX(
historical_data,
order=smodel.order,
seasonal_order=smodel.seasonal_order,
enforce_stationarity=False,
enforce_invertibility=False
)
best_model = mod.fit(disp=0)
return best_model
df_forecasting = df[["date", "value"]]
df_forecasting = df_forecasting.sort_values(by=["date"], ascending=True)
df_forecasting.set_index("date", inplace=True)
best_model = train_sarima_model(df_forecasting)
forecasting_values = list(best_model.forecast(steps=forecasting_period))
{
"balance": [
{
"date": "2020-07-21",
"value": 0.0
},
{
"date": "2020-07-22",
"value": -3799.2
},
{
"date": "2020-07-23",
"value": -3799.2
},
{
"date": "2020-07-24",
"value": -3799.2
},
{
"date": "2020-07-25",
"value": -3799.2
},
{
"date": "2020-07-26",
"value": -3799.2
},
{
"date": "2020-07-27",
"value": -3799.2
},
{
"date": "2020-07-28",
"value": -3799.2
},
{
"date": "2020-07-29",
"value": -3799.2
},
{
"date": "2020-07-30",
"value": -3799.2
},
{
"date": "2020-07-31",
"value": -3799.2
},
{
"date": "2020-08-01",
"value": -3799.2
},
{
"date": "2020-08-02",
"value": -3799.2
},
{
"date": "2020-08-03",
"value": -3799.2
},
{
"date": "2020-08-04",
"value": -3799.2
},
{
"date": "2020-08-05",
"value": -3799.2
},
{
"date": "2020-08-06",
"value": -3799.2
},
{
"date": "2020-08-07",
"value": -3799.2
},
{
"date": "2020-08-08",
"value": -3799.2
},
{
"date": "2020-08-09",
"value": -3799.2
},
{
"date": "2020-08-10",
"value": -3799.2
},
{
"date": "2020-08-11",
"value": -3799.2
},
{
"date": "2020-08-12",
"value": -3799.2
},
{
"date": "2020-08-13",
"value": -3799.2
},
{
"date": "2020-08-14",
"value": -3799.2
},
{
"date": "2020-08-15",
"value": -3799.2
},
{
"date": "2020-08-16",
"value": -3799.2
},
{
"date": "2020-08-17",
"value": -3799.2
},
{
"date": "2020-08-18",
"value": -3799.2
},
{
"date": "2020-08-19",
"value": -3799.2
},
{
"date": "2020-08-20",
"value": -3799.2
},
{
"date": "2020-08-21",
"value": -3799.2
},
{
"date": "2020-08-22",
"value": -7598.4
},
{
"date": "2020-08-23",
"value": -7598.4
},
{
"date": "2020-08-24",
"value": -7598.4
},
{
"date": "2020-08-25",
"value": -7598.4
},
{
"date": "2020-08-26",
"value": -7598.4
},
{
"date": "2020-08-27",
"value": -7598.4
},
{
"date": "2020-08-28",
"value": -8199.33
},
{
"date": "2020-08-29",
"value": -8199.33
},
{
"date": "2020-08-30",
"value": -8199.33
},
{
"date": "2020-08-31",
"value": -8199.33
},
{
"date": "2020-09-01",
"value": -8199.33
},
{
"date": "2020-09-02",
"value": -8199.33
},
{
"date": "2020-09-03",
"value": -9084.37
},
{
"date": "2020-09-04",
"value": -9084.37
},
{
"date": "2020-09-05",
"value": -10582.2
},
{
"date": "2020-09-06",
"value": 3080.3
},
{
"date": "2020-09-07",
"value": 3080.3
},
{
"date": "2020-09-08",
"value": 38080.3
},
{
"date": "2020-09-09",
"value": 38080.3
},
{
"date": "2020-09-10",
"value": 38080.3
},
{
"date": "2020-09-11",
"value": 38080.3
},
{
"date": "2020-09-12",
"value": 38080.3
},
{
"date": "2020-09-13",
"value": 36951.98
},
{
"date": "2020-09-14",
"value": 36951.98
},
{
"date": "2020-09-15",
"value": 36951.98
},
{
"date": "2020-09-16",
"value": 35683.81
},
{
"date": "2020-09-17",
"value": 35683.81
},
{
"date": "2020-09-18",
"value": 35683.81
},
{
"date": "2020-09-19",
"value": 35683.81
},
{
"date": "2020-09-20",
"value": 35683.81
},
{
"date": "2020-09-21",
"value": 35683.81
},
{
"date": "2020-09-22",
"value": 31460.83
},
{
"date": "2020-09-23",
"value": 31460.83
},
{
"date": "2020-09-24",
"value": -41714.74
},
{
"date": "2020-09-25",
"value": -41714.74
},
{
"date": "2020-09-26",
"value": -46472.74
},
{
"date": "2020-09-27",
"value": -46472.74
},
{
"date": "2020-09-28",
"value": -47073.67
},
{
"date": "2020-09-29",
"value": -47304.67
},
{
"date": "2020-09-30",
"value": -47304.67
},
{
"date": "2020-10-01",
"value": -45426.27
},
{
"date": "2020-10-02",
"value": -45426.27
},
{
"date": "2020-10-03",
"value": -45426.27
},
{
"date": "2020-10-04",
"value": -46924.1
},
{
"date": "2020-10-05",
"value": -46924.1
},
{
"date": "2020-10-06",
"value": -46924.1
},
{
"date": "2020-10-07",
"value": -44681.6
},
{
"date": "2020-10-08",
"value": -44931.6
},
{
"date": "2020-10-09",
"value": -44931.6
},
{
"date": "2020-10-10",
"value": -49422.08
},
{
"date": "2020-10-11",
"value": -50228.33
},
{
"date": "2020-10-12",
"value": -50228.33
},
{
"date": "2020-10-13",
"value": -49350.63
},
{
"date": "2020-10-14",
"value": -49350.63
},
{
"date": "2020-10-15",
"value": -49350.63
},
{
"date": "2020-10-16",
"value": -49350.63
},
{
"date": "2020-10-17",
"value": -49363.73
},
{
"date": "2020-10-18",
"value": -49363.73
},
{
"date": "2020-10-19",
"value": -49530.26
},
{
"date": "2020-10-20",
"value": -49530.26
},
{
"date": "2020-10-21",
"value": -49530.26
},
{
"date": "2020-10-22",
"value": -49530.26
},
{
"date": "2020-10-23",
"value": -53329.46
},
{
"date": "2020-10-24",
"value": -53329.46
},
{
"date": "2020-10-25",
"value": -53135.66
},
{
"date": "2020-10-26",
"value": -53135.66
},
{
"date": "2020-10-27",
"value": -77135.66
},
{
"date": "2020-10-28",
"value": -77736.59
},
{
"date": "2020-10-29",
"value": -77736.59
},
{
"date": "2020-10-30",
"value": -77736.59
},
{
"date": "2020-10-31",
"value": -80083.59
},
{
"date": "2020-11-01",
"value": -80105.19
},
{
"date": "2020-11-02",
"value": -78731.24
},
{
"date": "2020-11-03",
"value": -78731.24
},
{
"date": "2020-11-04",
"value": -78807.11
},
{
"date": "2020-11-05",
"value": -80304.94
},
{
"date": "2020-11-06",
"value": -80304.94
},
{
"date": "2020-11-07",
"value": -80304.94
},
{
"date": "2020-11-08",
"value": -80878.69
},
{
"date": "2020-11-09",
"value": -80925.64
},
{
"date": "2020-11-10",
"value": -81305.64
},
{
"date": "2020-11-11",
"value": -80512.64
},
{
"date": "2020-11-12",
"value": -80512.64
},
{
"date": "2020-11-13",
"value": -80512.64
},
{
"date": "2020-11-14",
"value": -80512.64
},
{
"date": "2020-11-15",
"value": -84057.43
},
{
"date": "2020-11-16",
"value": -84057.43
},
{
"date": "2020-11-17",
"value": -84057.43
},
{
"date": "2020-11-18",
"value": -87557.43
},
{
"date": "2020-11-19",
"value": -92356.43
},
{
"date": "2020-11-20",
"value": -92356.43
},
{
"date": "2020-11-21",
"value": -92356.43
},
{
"date": "2020-11-22",
"value": -91356.43
},
{
"date": "2020-11-23",
"value": -96317.73
},
{
"date": "2020-11-24",
"value": -96317.73
},
{
"date": "2020-11-25",
"value": -99317.73
},
{
"date": "2020-11-26",
"value": -99317.73
},
{
"date": "2020-11-27",
"value": -97797.73
},
{
"date": "2020-11-28",
"value": -98398.66
},
{
"date": "2020-11-29",
"value": -104547.66
},
{
"date": "2020-11-30",
"value": -104547.66
},
{
"date": "2020-12-01",
"value": -104569.26
},
{
"date": "2020-12-02",
"value": -98420.26
},
{
"date": "2020-12-03",
"value": -98420.26
},
{
"date": "2020-12-04",
"value": -98420.26
},
{
"date": "2020-12-05",
"value": -99974.01
},
{
"date": "2020-12-06",
"value": -99974.01
},
{
"date": "2020-12-07",
"value": -102775.87
},
{
"date": "2020-12-08",
"value": -102775.87
},
{
"date": "2020-12-09",
"value": -102776.47
},
{
"date": "2020-12-10",
"value": -100968.77
},
{
"date": "2020-12-11",
"value": -100708.77
},
{
"date": "2020-12-12",
"value": -101976.77
},
{
"date": "2020-12-13",
"value": -102356.77
},
{
"date": "2020-12-14",
"value": -102356.77
},
{
"date": "2020-12-15",
"value": -98721.17
},
{
"date": "2020-12-16",
"value": -105721.17
},
{
"date": "2020-12-17",
"value": -106065.66
},
{
"date": "2020-12-18",
"value": -106065.66
},
{
"date": "2020-12-19",
"value": -106065.66
},
{
"date": "2020-12-20",
"value": -106065.66
},
{
"date": "2020-12-21",
"value": -106066.41
},
{
"date": "2020-12-22",
"value": -109865.61
},
{
"date": "2020-12-23",
"value": -111027.71
},
{
"date": "2020-12-24",
"value": -111027.71
},
{
"date": "2020-12-25",
"value": -111027.71
},
{
"date": "2020-12-26",
"value": -112047.23
},
{
"date": "2020-12-27",
"value": -119641.23
},
{
"date": "2020-12-28",
"value": -120242.16
},
{
"date": "2020-12-29",
"value": -120242.16
},
{
"date": "2020-12-30",
"value": -120242.16
},
{
"date": "2020-12-31",
"value": -120242.16
},
{
"date": "2021-01-01",
"value": -120263.76
},
{
"date": "2021-01-02",
"value": -113563.76
},
{
"date": "2021-01-03",
"value": -113563.76
},
{
"date": "2021-01-04",
"value": -113563.76
},
{
"date": "2021-01-05",
"value": -121368.63
},
{
"date": "2021-01-06",
"value": -121368.63
},
{
"date": "2021-01-07",
"value": -121368.63
},
{
"date": "2021-01-08",
"value": -134820.63
},
{
"date": "2021-01-09",
"value": -134820.63
},
{
"date": "2021-01-10",
"value": -136590.81
},
{
"date": "2021-01-11",
"value": -136590.81
},
{
"date": "2021-01-12",
"value": -137875.81
},
{
"date": "2021-01-13",
"value": -137875.81
},
{
"date": "2021-01-14",
"value": -138125.81
},
{
"date": "2021-01-15",
"value": -138127.26
},
{
"date": "2021-01-16",
"value": -138127.26
},
{
"date": "2021-01-17",
"value": -138127.26
},
{
"date": "2021-01-18",
"value": -138127.26
},
{
"date": "2021-01-19",
"value": -138127.26
},
{
"date": "2021-01-20",
"value": -138127.26
},
{
"date": "2021-01-21",
"value": -136651.06
},
{
"date": "2021-01-22",
"value": -137813.91
},
{
"date": "2021-01-23",
"value": -137813.91
},
{
"date": "2021-01-24",
"value": -137813.91
},
{
"date": "2021-01-25",
"value": -138063.91
},
{
"date": "2021-01-26",
"value": -138063.91
},
{
"date": "2021-01-27",
"value": -138063.91
},
{
"date": "2021-01-28",
"value": -138063.91
},
{
"date": "2021-01-29",
"value": -138063.91
},
{
"date": "2021-01-30",
"value": -138063.91
},
{
"date": "2021-01-31",
"value": -138063.91
},
{
"date": "2021-02-01",
"value": -138085.51
},
{
"date": "2021-02-02",
"value": -138461.51
},
{
"date": "2021-02-03",
"value": -138461.51
},
{
"date": "2021-02-04",
"value": -138461.51
},
{
"date": "2021-02-05",
"value": -103517.43
},
{
"date": "2021-02-06",
"value": -104615.43
},
{
"date": "2021-02-07",
"value": -104615.43
},
{
"date": "2021-02-08",
"value": -104615.43
},
{
"date": "2021-02-09",
"value": -103615.43
},
{
"date": "2021-02-10",
"value": -103615.43
},
{
"date": "2021-02-11",
"value": -104340.11
},
{
"date": "2021-02-12",
"value": -105660.51
},
{
"date": "2021-02-13",
"value": -106805.81
},
{
"date": "2021-02-14",
"value": -115238.45
},
{
"date": "2021-02-15",
"value": -115238.45
},
{
"date": "2021-02-16",
"value": -115238.45
},
{
"date": "2021-02-17",
"value": -115238.45
},
{
"date": "2021-02-18",
"value": -115238.45
},
{
"date": "2021-02-19",
"value": -115238.45
},
{
"date": "2021-02-20",
"value": -115238.45
},
{
"date": "2021-02-21",
"value": -115238.45
},
{
"date": "2021-02-22",
"value": -115238.45
},
{
"date": "2021-02-23",
"value": -115241.95
},
{
"date": "2021-02-24",
"value": -554.95
},
{
"date": "2021-02-25",
"value": -554.95
},
{
"date": "2021-02-26",
"value": -4.95
},
{
"date": "2021-02-27",
"value": -4.95
},
{
"date": "2021-02-28",
"value": -4.95
},
{
"date": "2021-03-01",
"value": 3726.85
},
{
"date": "2021-03-02",
"value": 3726.85
},
{
"date": "2021-03-03",
"value": 3726.85
},
{
"date": "2021-03-04",
"value": 3726.85
},
{
"date": "2021-03-05",
"value": 3726.85
},
{
"date": "2021-03-06",
"value": 3726.85
},
{
"date": "2021-03-07",
"value": 3726.85
},
{
"date": "2021-03-08",
"value": 3670.93
},
{
"date": "2021-03-09",
"value": 3670.93
},
{
"date": "2021-03-10",
"value": 3670.93
},
{
"date": "2021-03-11",
"value": 6465.25
},
{
"date": "2021-03-12",
"value": 5180.25
},
{
"date": "2021-03-13",
"value": 7230.25
},
{
"date": "2021-03-14",
"value": 7230.25
},
{
"date": "2021-03-15",
"value": 7210.3
},
{
"date": "2021-03-16",
"value": 7210.3
},
{
"date": "2021-03-17",
"value": 7210.3
},
{
"date": "2021-03-18",
"value": 7892.58
},
{
"date": "2021-03-19",
"value": 7892.58
},
{
"date": "2021-03-20",
"value": 7892.58
},
{
"date": "2021-03-21",
"value": 7892.58
},
{
"date": "2021-03-22",
"value": 7892.58
},
{
"date": "2021-03-23",
"value": 7314.48
},
{
"date": "2021-03-24",
"value": 7314.48
},
{
"date": "2021-03-25",
"value": 7314.48
},
{
"date": "2021-03-26",
"value": 7314.48
},
{
"date": "2021-03-27",
"value": 7314.48
},
{
"date": "2021-03-28",
"value": 7314.48
},
{
"date": "2021-03-29",
"value": 8514.48
},
{
"date": "2021-03-30",
"value": 9977.53
},
{
"date": "2021-03-31",
"value": 9957.58
},
{
"date": "2021-04-01",
"value": 9935.98
},
{
"date": "2021-04-02",
"value": 9935.98
},
{
"date": "2021-04-03",
"value": 9935.98
},
{
"date": "2021-04-04",
"value": 8412.2
},
{
"date": "2021-04-05",
"value": 8412.2
},
{
"date": "2021-04-06",
"value": 8412.2
},
{
"date": "2021-04-07",
"value": 8412.2
},
{
"date": "2021-04-08",
"value": 72412.2
},
{
"date": "2021-04-09",
"value": 72378.25
},
{
"date": "2021-04-10",
"value": 72378.25
},
{
"date": "2021-04-11",
"value": 72378.25
},
{
"date": "2021-04-12",
"value": 71093.25
},
{
"date": "2021-04-13",
"value": 71093.25
},
{
"date": "2021-04-14",
"value": 71092.75
},
{
"date": "2021-04-15",
"value": 71092.75
},
{
"date": "2021-04-16",
"value": 136685.95
},
{
"date": "2021-04-17",
"value": 137062.13
},
{
"date": "2021-04-18",
"value": 140697.76
},
{
"date": "2021-04-19",
"value": 140697.76
},
{
"date": "2021-04-20",
"value": 140697.76
},
{
"date": "2021-04-21",
"value": 140088.75
},
{
"date": "2021-04-22",
"value": 140088.75
},
{
"date": "2021-04-23",
"value": 113926.65
},
{
"date": "2021-04-24",
"value": 113926.65
},
{
"date": "2021-04-25",
"value": 96958.65
},
{
"date": "2021-04-26",
"value": 96958.65
},
{
"date": "2021-04-27",
"value": 96958.65
},
{
"date": "2021-04-28",
"value": 96956.65
},
{
"date": "2021-04-29",
"value": 96956.65
},
{
"date": "2021-04-30",
"value": 96956.65
},
{
"date": "2021-05-01",
"value": 96956.65
},
{
"date": "2021-05-02",
"value": 96956.65
},
{
"date": "2021-05-03",
"value": 96956.65
},
{
"date": "2021-05-04",
"value": 95432.87
},
{
"date": "2021-05-05",
"value": 95432.87
},
{
"date": "2021-05-06",
"value": 95465.28
},
{
"date": "2021-05-07",
"value": 95465.28
},
{
"date": "2021-05-08",
"value": 95465.28
},
{
"date": "2021-05-09",
"value": 95465.28
},
{
"date": "2021-05-10",
"value": 94740.6
},
{
"date": "2021-05-11",
"value": 94740.6
},
{
"date": "2021-05-12",
"value": 94740.6
},
{
"date": "2021-05-13",
"value": 91405.6
},
{
"date": "2021-05-14",
"value": 91405.6
},
{
"date": "2021-05-15",
"value": 91405.6
},
{
"date": "2021-05-16",
"value": 91405.6
},
{
"date": "2021-05-17",
"value": 91405.6
}
]
}
Thanks to @NickODell comment, the issue was solved as the following:
for the ARIMA/SARIMAX model, a random state should be defined with a specific seed and then be passed to the pm.auto_arima
function. The reason is that pm.auto_arima
uses a train_test_split
function (which usually takes a seed) to find the best model.
random_state = np.random.RandomState(123)
smodel = pm.auto_arima(
df_historical.value,
start_p=1,
start_q=1,
test="adf",
max_p=3,
max_q=3,
m=12,
start_P=0,
seasonal=True,
d=None,
D=1,
trace=True,
error_action="ignore",
suppress_warnings=True,
stepwise=True,
random_state=random_state
)
mod = SARIMAX(
historical_data,
order=smodel.order,
seasonal_order=smodel.seasonal_order,
enforce_stationarity=False,
enforce_invertibility=False
)
best_model = mod.fit(disp=0)
for the linear regression, the issue was solved by deactivating the intercept fitting since it contains a lot of averaging and shifting calculations and this causes some differences in the predicted values between different operating systems.
regr = LinearRegression(fit_intercept=False)
regr.fit(x, y)