Meta-Model Structure Selection (MetaMSS) algorithm for building Polynomial NARX modelsΒΆ

Example created by Wilson Rocha Lacerda Junior

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sysidentpy.model_structure_selection import MetaMSS, FROLS
from sysidentpy.metrics import root_relative_squared_error
from sysidentpy.basis_function._basis_function import Polynomial
from sysidentpy.utils.display_results import results
from sysidentpy.utils.plotting import plot_residues_correlation, plot_results
from sysidentpy.residues.residues_correlation import compute_residues_autocorrelation, compute_cross_correlation
df1 = pd.read_csv('examples/datasets/x_cc.csv')
df2 = pd.read_csv('examples/datasets/y_cc.csv')

df2[5000:80000].plot(figsize=(10, 4))
<AxesSubplot:>
../_images/metamss_2_11.png
# we will decimate the data using d=500 in this example
x_train, x_test = np.split(df1.iloc[::500].values, 2)
y_train, y_test = np.split(df2.iloc[::500].values, 2)
basis_function = Polynomial(degree=2)

model = MetaMSS(
    norm=-2,
    xlag=3,
    ylag=3,
    estimator="recursive_least_squares",
    k_agents_percent=10,
    estimate_parameter=True,
    maxiter=30,
    n_agents=10,
    loss_func='metamss_loss',
    basis_function=basis_function
)
model.fit(X_train=x_train, y_train=y_train, X_test=x_test, y_test=y_test)
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\narmax_base.py:796: RuntimeWarning: overflow encountered in power
  np.power(raw_regressor, model_exponents[j])
C:\Users\wilso\miniconda3\envs\sysidentpy\lib\site-packages\numpy\core\fromnumeric.py:87: RuntimeWarning: overflow encountered in reduce
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\model_structure_selection\meta_model_structure_selection.py:421: RuntimeWarning: overflow encountered in square
  sum_of_squared_residues = np.sum(residues ** 2)
C:\Users\wilso\miniconda3\envs\sysidentpy\lib\site-packages\numpy\core\fromnumeric.py:87: RuntimeWarning: invalid value encountered in reduce
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\metrics\_regression.py:215: RuntimeWarning: overflow encountered in square
  numerator = np.sum(np.square((y_predicted - y)))
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\metrics\_regression.py:216: RuntimeWarning: overflow encountered in square
  denominator = np.sum(np.square((y_predicted - np.mean(y, axis=0))))
C:\Users\wilso\miniconda3\envs\sysidentpy\lib\site-packages\numpy\linalg\linalg.py:2567: RuntimeWarning: divide by zero encountered in power
  absx **= ord
<sysidentpy.model_structure_selection.meta_model_structure_selection.MetaMSS at 0x284fd469a90>
yhat = model.predict(X_test=x_test, y_test=y_test)
rrse = root_relative_squared_error(y_test, yhat)
print(rrse)

r = pd.DataFrame(
    results(
        model.final_model, model.theta, model.err,
        model.n_terms, err_precision=8, dtype='sci'
        ),
    columns=['Regressors', 'Parameters', 'ERR'])
print(r)

plot_results(y=y_test, yhat = yhat, n=1000)
ee = compute_residues_autocorrelation(y_test, yhat)
plot_residues_correlation(data=ee, title="Residues", ylabel="$e^2$")
x1e = compute_cross_correlation(y_test, yhat, x_test)
plot_residues_correlation(data=x1e, title="Residues", ylabel="$x_1e$")
0.042484059098656546
        Regressors   Parameters             ERR
0           y(k-1)   1.1373E+00  0.00000000E+00
1           y(k-3)  -3.5811E-01  0.00000000E+00
2          x1(k-1)   5.8621E+02  0.00000000E+00
3          x1(k-2)   7.3266E-02  0.00000000E+00
4          x1(k-3)   1.1304E+01  0.00000000E+00
5         y(k-1)^2  -4.9046E-05  0.00000000E+00
6     y(k-3)y(k-1)   1.3034E-04  0.00000000E+00
7    x1(k-1)y(k-1)  -1.6871E-01  0.00000000E+00
8    x1(k-2)y(k-1)   1.0762E-01  0.00000000E+00
9    x1(k-3)y(k-1)  -1.6445E-01  0.00000000E+00
10        y(k-2)^2  -4.6619E-05  0.00000000E+00
11   x1(k-1)y(k-2)   9.8404E-02  0.00000000E+00
12   x1(k-2)y(k-2)  -9.8212E-02  0.00000000E+00
13   x1(k-3)y(k-2)   1.5368E-01  0.00000000E+00
14        y(k-3)^2  -1.3976E-05  0.00000000E+00
15   x1(k-1)y(k-3)  -1.9331E-02  0.00000000E+00
16   x1(k-3)y(k-3)  -5.1422E-02  0.00000000E+00
17  x1(k-3)x1(k-1)   4.4025E+00  0.00000000E+00
18       x1(k-2)^2   3.6633E-01  0.00000000E+00
19  x1(k-3)x1(k-2)   8.7208E+00  0.00000000E+00
20       x1(k-3)^2   5.6522E+01  0.00000000E+00
../_images/metamss_5_11.png ../_images/metamss_5_2.png ../_images/metamss_5_3.png
# Ploting the evolution of the agents
plt.plot(model.best_by_iter)
model.best_by_iter[-1]
0.005137296582679524
../_images/metamss_6_11.png
# You have access to all tested models
# model.tested_models
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.naive_bayes import GaussianNB
from catboost import CatBoostRegressor
from sklearn.linear_model import BayesianRidge, ARDRegression
from sysidentpy.general_estimators import NARX

xlag = ylag = 10

estimators = [
    ('NARX_KNeighborsRegressor', NARX(
        base_estimator=KNeighborsRegressor(),
        xlag=xlag,
        ylag=ylag)),
    ('NARX_DecisionTreeRegressor', NARX(
        base_estimator=DecisionTreeRegressor(),
        xlag=xlag,
        ylag=ylag)),
    ('NARX_RandomForestRegressor', NARX(
        base_estimator=RandomForestRegressor(
            n_estimators=200),
        xlag=xlag,
        ylag=ylag,
    )),
    ('NARX_Catboost', NARX(
        base_estimator=CatBoostRegressor(
        iterations=800,
        learning_rate=0.1,
        depth=8),
        xlag=xlag,
        ylag=ylag,
        non_degree=1,
        fit_params={'verbose': False}
    )),
    ('NARX_ARD', NARX(
        base_estimator=ARDRegression(),
        xlag=xlag,
        ylag=ylag,
        non_degree=2
    )),
    ('FROLS-Polynomial_NARX', FROLS(
        order_selection=True,
        n_info_values=50,
        extended_least_squares=False,
        ylag=ylag, xlag=xlag,
        info_criteria='bic',
        estimator='recursive_least_squares',
        basis_function=basis_function
        )
    ),
    ('MetaMSS', MetaMSS(norm=-2,
                xlag=xlag,
                ylag=ylag,
                estimator="recursive_least_squares",
                k_agents_percent=10,
                estimate_parameter=True,
                maxiter=20,
                n_agents=15,
                loss_func='metamss_loss',
                basis_function=basis_function)
    )
    
    ]


resultados = {}
for nome_do_modelo, modelo in estimators:
    resultados['%s' % (nome_do_modelo)] = []
    if nome_do_modelo == 'MetaMSS':
        modelo.fit(X_train=x_train, y_train=y_train, X_test=x_test, y_test=y_test)
        yhat = modelo.predict(X_test=x_test,  y_test=y_test)
    else:
        modelo.fit(X=x_train, y=y_train)
        yhat = modelo.predict(X=x_test,  y=y_test)
    if nome_do_modelo in ['MetaMSS', 'FROLS-Polynomial_NARX']:
        result = root_relative_squared_error(y_test[modelo.max_lag:], yhat[modelo.max_lag:])
    else:
        result = root_relative_squared_error(y_test, yhat)
    resultados['%s' % (nome_do_modelo)].append(result)
    print(nome_do_modelo, '%.3f' % np.mean(result))
10-14 20:43:29 - INFO - Training the model
10-14 20:43:29 - INFO - Creating the regressor matrix
10-14 20:43:29 - INFO - The regressor matrix have 21 features
10-14 20:43:29 - INFO - Done! Model is built!
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\utils\deprecation.py:27: FutureWarning: Function __init__ has been deprecated since v0.1.7.
 Use NARXNN(ylag=2, xlag=2, basis_function='Some basis function') instead.This module was deprecated in favor of NARXNN(ylag=2, xlag=2, basis_function='Some basis function') module into which all the refactored classes and functions are moved.
 This feature will be removed in version v0.2.0.
  warnings.warn(message, FutureWarning)
NARX_KNeighborsRegressor 1.833
10-14 20:43:29 - INFO - Training the model
10-14 20:43:29 - INFO - Creating the regressor matrix
10-14 20:43:29 - INFO - The regressor matrix have 21 features
10-14 20:43:29 - INFO - Done! Model is built!
NARX_DecisionTreeRegressor 0.277
10-14 20:43:29 - INFO - Training the model
10-14 20:43:29 - INFO - Creating the regressor matrix
10-14 20:43:29 - INFO - The regressor matrix have 21 features
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\general_estimators\narx.py:161: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  self.base_estimator.fit(X, y, **self.fit_params)
10-14 20:43:30 - INFO - Done! Model is built!
NARX_RandomForestRegressor 0.194
10-14 20:43:34 - INFO - Training the model
10-14 20:43:34 - INFO - Creating the regressor matrix
10-14 20:43:34 - INFO - The regressor matrix have 21 features
10-14 20:43:36 - INFO - Done! Model is built!
NARX_Catboost 0.179
10-14 20:43:37 - INFO - Training the model
10-14 20:43:37 - INFO - Creating the regressor matrix
10-14 20:43:37 - INFO - The regressor matrix have 231 features
10-14 20:43:37 - INFO - Done! Model is built!
C:\Users\wilso\miniconda3\envs\sysidentpy\lib\site-packages\sklearn\utils\validation.py:63: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  return f(*args, **kwargs)
NARX_ARD 0.074
FROLS-Polynomial_NARX 0.047
C:\Users\wilso\miniconda3\envs\sysidentpy\lib\site-packages\numpy\core\fromnumeric.py:87: RuntimeWarning: overflow encountered in reduce
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\narmax_base.py:796: RuntimeWarning: overflow encountered in power
  np.power(raw_regressor, model_exponents[j])
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\model_structure_selection\meta_model_structure_selection.py:421: RuntimeWarning: overflow encountered in square
  sum_of_squared_residues = np.sum(residues ** 2)
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\model_structure_selection\meta_model_structure_selection.py:431: RuntimeWarning: invalid value encountered in sqrt
  se_theta = np.sqrt(var_e)
C:\Users\wilso\miniconda3\envs\sysidentpy\lib\site-packages\numpy\core\fromnumeric.py:87: RuntimeWarning: invalid value encountered in reduce
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\metrics\_regression.py:215: RuntimeWarning: overflow encountered in square
  numerator = np.sum(np.square((y_predicted - y)))
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\metrics\_regression.py:216: RuntimeWarning: overflow encountered in square
  denominator = np.sum(np.square((y_predicted - np.mean(y, axis=0))))
C:\Users\wilso\miniconda3\envs\sysidentpy\lib\site-packages\numpy\linalg\linalg.py:2567: RuntimeWarning: divide by zero encountered in power
  absx **= ord
MetaMSS 0.030
for aux_results, results in sorted(resultados.items(), key=lambda x: np.mean(x[1]), reverse=False):
    print(aux_results, np.mean(results))
MetaMSS 0.03020874227035109
FROLS-Polynomial_NARX 0.04663897799085836
NARX_ARD 0.07413356855178779
NARX_Catboost 0.17923040407121107
NARX_RandomForestRegressor 0.19361475059211708
NARX_DecisionTreeRegressor 0.2765294836744295
NARX_KNeighborsRegressor 1.833370478725381