Air Passenger benchmark¶
Example created by Wilson Rocha Lacerda Junior
Note¶
The following example is not intended to say that one library is better than another. The main focus of these examples is to show that SysIdentPy can be a good alternative for people looking to model time series.
We will compare the results obtained using the sktime and neural prophet library.
From sktime, the following models will be used:
AutoARIMA
BATS
TBATS
Exponential Smoothing
Prophet
AutoETS
For the sake of brevity, from SysIdentPy only the MetaMSS, AOLS, FROLS (with polynomial base function) and NARXNN methods will be used. See the SysIdentPy documentation to learn other ways of modeling with the library.
In [ ]:
Copied!
from warnings import simplefilter
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.signal.signaltools
def _centered(arr, newsize):
# Return the center newsize portion of the array.
newsize = np.asarray(newsize)
currsize = np.array(arr.shape)
startind = (currsize - newsize) // 2
endind = startind + newsize
myslice = [slice(startind[k], endind[k]) for k in range(len(endind))]
return arr[tuple(myslice)]
scipy.signal.signaltools._centered = _centered
from sysidentpy.model_structure_selection import FROLS
from sysidentpy.model_structure_selection import AOLS
from sysidentpy.model_structure_selection import MetaMSS
from sysidentpy.basis_function import Polynomial
from sysidentpy.utils.plotting import plot_results
from torch import nn
# from sysidentpy.metrics import mean_squared_error
from sysidentpy.neural_network import NARXNN
from sktime.datasets import load_airline
from sktime.forecasting.ets import AutoETS
from sktime.forecasting.arima import ARIMA, AutoARIMA
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.fbprophet import Prophet
from sktime.forecasting.tbats import TBATS
from sktime.forecasting.bats import BATS
# from sktime.forecasting.model_evaluation import evaluate
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.performance_metrics.forecasting import mean_squared_error
from sktime.utils.plotting import plot_series
from neuralprophet import NeuralProphet
from neuralprophet import set_random_seed
simplefilter("ignore", FutureWarning)
np.seterr(all="ignore")
%matplotlib inline
loss = mean_squared_error
from warnings import simplefilter import matplotlib.pyplot as plt import numpy as np import pandas as pd import scipy.signal.signaltools def _centered(arr, newsize): # Return the center newsize portion of the array. newsize = np.asarray(newsize) currsize = np.array(arr.shape) startind = (currsize - newsize) // 2 endind = startind + newsize myslice = [slice(startind[k], endind[k]) for k in range(len(endind))] return arr[tuple(myslice)] scipy.signal.signaltools._centered = _centered from sysidentpy.model_structure_selection import FROLS from sysidentpy.model_structure_selection import AOLS from sysidentpy.model_structure_selection import MetaMSS from sysidentpy.basis_function import Polynomial from sysidentpy.utils.plotting import plot_results from torch import nn # from sysidentpy.metrics import mean_squared_error from sysidentpy.neural_network import NARXNN from sktime.datasets import load_airline from sktime.forecasting.ets import AutoETS from sktime.forecasting.arima import ARIMA, AutoARIMA from sktime.forecasting.base import ForecastingHorizon from sktime.forecasting.exp_smoothing import ExponentialSmoothing from sktime.forecasting.fbprophet import Prophet from sktime.forecasting.tbats import TBATS from sktime.forecasting.bats import BATS # from sktime.forecasting.model_evaluation import evaluate from sktime.forecasting.model_selection import temporal_train_test_split from sktime.performance_metrics.forecasting import mean_squared_error from sktime.utils.plotting import plot_series from neuralprophet import NeuralProphet from neuralprophet import set_random_seed simplefilter("ignore", FutureWarning) np.seterr(all="ignore") %matplotlib inline loss = mean_squared_error
Air passengers data¶
In [2]:
Copied!
y = load_airline()
y_train, y_test = temporal_train_test_split(y, test_size=23) # 23 samples for testing
plot_series(y_train, y_test, labels=["y_train", "y_test"])
fh = ForecastingHorizon(y_test.index, is_relative=False)
print(y_train.shape[0], y_test.shape[0])
y = load_airline() y_train, y_test = temporal_train_test_split(y, test_size=23) # 23 samples for testing plot_series(y_train, y_test, labels=["y_train", "y_test"]) fh = ForecastingHorizon(y_test.index, is_relative=False) print(y_train.shape[0], y_test.shape[0])
Results¶
No. | Package | Mean Squared Error |
---|---|---|
1 | SysIdentPy (Neural Model) | 316.54 |
2 | SysIdentPy (MetaMSS) | 450.99 |
3 | SysIdentPy (AOLS) | 476.64 |
4 | NeuralProphet | 501.24 |
5 | SysIdentPy (FROLS) | 805.95 |
6 | Exponential Smoothing | 910.52 |
7 | Prophet | 1186.00 |
8 | AutoArima | 1714.47 |
9 | Manual Arima | 2085.42 |
10 | ETS | 2590.05 |
11 | BATS | 7286.64 |
12 | TBATS | 7448.43 |
SysIdentPy FROLS¶
In [3]:
Copied!
y = load_airline()
y_train, y_test = temporal_train_test_split(y, test_size=23)
y_train = y_train.values.reshape(-1, 1)
y_test = y_test.values.reshape(-1, 1)
basis_function = Polynomial(degree=1)
sysidentpy = FROLS(
order_selection=True,
ylag=13, # the lags for all models will be 13
basis_function=basis_function,
model_type="NAR",
)
sysidentpy.fit(y=y_train)
y_test = np.concatenate([y_train[-sysidentpy.max_lag :], y_test])
yhat = sysidentpy.predict(y=y_test, forecast_horizon=23)
frols_loss = loss(
pd.Series(y_test.flatten()[sysidentpy.max_lag :]),
pd.Series(yhat.flatten()[sysidentpy.max_lag :]),
)
print(frols_loss)
plot_results(y=y_test[sysidentpy.max_lag :], yhat=yhat[sysidentpy.max_lag :])
y = load_airline() y_train, y_test = temporal_train_test_split(y, test_size=23) y_train = y_train.values.reshape(-1, 1) y_test = y_test.values.reshape(-1, 1) basis_function = Polynomial(degree=1) sysidentpy = FROLS( order_selection=True, ylag=13, # the lags for all models will be 13 basis_function=basis_function, model_type="NAR", ) sysidentpy.fit(y=y_train) y_test = np.concatenate([y_train[-sysidentpy.max_lag :], y_test]) yhat = sysidentpy.predict(y=y_test, forecast_horizon=23) frols_loss = loss( pd.Series(y_test.flatten()[sysidentpy.max_lag :]), pd.Series(yhat.flatten()[sysidentpy.max_lag :]), ) print(frols_loss) plot_results(y=y_test[sysidentpy.max_lag :], yhat=yhat[sysidentpy.max_lag :])
SysIdentPy AOLS¶
In [4]:
Copied!
y = load_airline()
y_train, y_test = temporal_train_test_split(y, test_size=23)
y_train = y_train.values.reshape(-1, 1)
y_test = y_test.values.reshape(-1, 1)
df_train, df_test = temporal_train_test_split(y, test_size=23)
df_train = df_train.reset_index()
df_train.columns = ["ds", "y"]
df_train["ds"] = pd.to_datetime(df_train["ds"].astype(str))
df_test = df_test.reset_index()
df_test.columns = ["ds", "y"]
df_test["ds"] = pd.to_datetime(df_test["ds"].astype(str))
sysidentpy_AOLS = AOLS(
ylag=13, k=2, L=1, model_type="NAR", basis_function=basis_function
)
sysidentpy_AOLS.fit(y=y_train)
y_test = np.concatenate([y_train[-sysidentpy_AOLS.max_lag :], y_test])
yhat = sysidentpy_AOLS.predict(y=y_test, steps_ahead=None, forecast_horizon=23)
aols_loss = loss(
pd.Series(y_test.flatten()[sysidentpy_AOLS.max_lag :]),
pd.Series(yhat.flatten()[sysidentpy_AOLS.max_lag :]),
)
print(aols_loss)
plot_results(y=y_test[sysidentpy_AOLS.max_lag :], yhat=yhat[sysidentpy_AOLS.max_lag :])
y = load_airline() y_train, y_test = temporal_train_test_split(y, test_size=23) y_train = y_train.values.reshape(-1, 1) y_test = y_test.values.reshape(-1, 1) df_train, df_test = temporal_train_test_split(y, test_size=23) df_train = df_train.reset_index() df_train.columns = ["ds", "y"] df_train["ds"] = pd.to_datetime(df_train["ds"].astype(str)) df_test = df_test.reset_index() df_test.columns = ["ds", "y"] df_test["ds"] = pd.to_datetime(df_test["ds"].astype(str)) sysidentpy_AOLS = AOLS( ylag=13, k=2, L=1, model_type="NAR", basis_function=basis_function ) sysidentpy_AOLS.fit(y=y_train) y_test = np.concatenate([y_train[-sysidentpy_AOLS.max_lag :], y_test]) yhat = sysidentpy_AOLS.predict(y=y_test, steps_ahead=None, forecast_horizon=23) aols_loss = loss( pd.Series(y_test.flatten()[sysidentpy_AOLS.max_lag :]), pd.Series(yhat.flatten()[sysidentpy_AOLS.max_lag :]), ) print(aols_loss) plot_results(y=y_test[sysidentpy_AOLS.max_lag :], yhat=yhat[sysidentpy_AOLS.max_lag :])
SysIdentPy MetaMSS¶
In [34]:
Copied!
set_random_seed(42)
y = load_airline()
y_train, y_test, y_validation, _ = np.split(y, [100, 121, 144])
y_train = y_train.values.reshape(-1, 1)
y_test = y_test.values.reshape(-1, 1)
y_validation = y_validation.values.reshape(-1, 1)
sysidentpy_metamss = MetaMSS(basis_function=basis_function, ylag=13, model_type="NAR")
sysidentpy_metamss.fit(y=y_train, y_test=y_test)
y_validation = np.concatenate([y_test[-sysidentpy_metamss.max_lag :], y_validation])
yhat = sysidentpy_metamss.predict(y=y_validation, steps_ahead=None, forecast_horizon=23)
metamss_loss = loss(
pd.Series(y_validation.flatten()[sysidentpy_metamss.max_lag :]),
pd.Series(yhat.flatten()[sysidentpy_metamss.max_lag :]),
)
print(metamss_loss)
plot_results(
y=y_validation[sysidentpy_metamss.max_lag :],
yhat=yhat[sysidentpy_metamss.max_lag :],
)
set_random_seed(42) y = load_airline() y_train, y_test, y_validation, _ = np.split(y, [100, 121, 144]) y_train = y_train.values.reshape(-1, 1) y_test = y_test.values.reshape(-1, 1) y_validation = y_validation.values.reshape(-1, 1) sysidentpy_metamss = MetaMSS(basis_function=basis_function, ylag=13, model_type="NAR") sysidentpy_metamss.fit(y=y_train, y_test=y_test) y_validation = np.concatenate([y_test[-sysidentpy_metamss.max_lag :], y_validation]) yhat = sysidentpy_metamss.predict(y=y_validation, steps_ahead=None, forecast_horizon=23) metamss_loss = loss( pd.Series(y_validation.flatten()[sysidentpy_metamss.max_lag :]), pd.Series(yhat.flatten()[sysidentpy_metamss.max_lag :]), ) print(metamss_loss) plot_results( y=y_validation[sysidentpy_metamss.max_lag :], yhat=yhat[sysidentpy_metamss.max_lag :], )