Identification of an electromechanical system

Example created by Wilson Rocha Lacerda Junior

More details about this data can be found in the following paper (in Portuguese): https://www.researchgate.net/publication/320418710_Identificacao_de_um_motorgerador_CC_por_meio_de_modelos_polinomiais_autorregressivos_e_redes_neurais_artificiais

pip install sysidentpy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sysidentpy.model_structure_selection import FROLS
from sysidentpy.basis_function._basis_function import Polynomial
from sysidentpy.metrics import root_relative_squared_error
from sysidentpy.utils.generate_data import get_siso_data
from sysidentpy.utils.display_results import results
from sysidentpy.utils.plotting import plot_residues_correlation, plot_results
from sysidentpy.residues.residues_correlation import compute_residues_autocorrelation, compute_cross_correlation
df1 = pd.read_csv('examples/datasets/x_cc.csv')
df2 = pd.read_csv('examples/datasets/y_cc.csv')
df2[5000:80000].plot(figsize=(10, 4))
<AxesSubplot:>
../_images/identification_of_an_electromechanical_system_4_11.png
# we will decimate the data using d=500 in this example
x_train, x_valid = np.split(df1.iloc[::500].values, 2)
y_train, y_valid = np.split(df2.iloc[::500].values, 2)

Building a Polynomial NARX model

basis_function = Polynomial(degree=2)

model = FROLS(
    order_selection=True,
    n_info_values=40,
    extended_least_squares=False,
    ylag=2, xlag=2,
    info_criteria='bic',
    estimator='recursive_least_squares',
    basis_function=basis_function
)
model.fit(X=x_train, y=y_train)
yhat = model.predict(X=x_valid, y=y_valid)
rrse = root_relative_squared_error(y_valid, yhat)
print(rrse)

r = pd.DataFrame(
    results(
        model.final_model, model.theta, model.err,
        model.n_terms, err_precision=8, dtype='sci'
        ),
    columns=['Regressors', 'Parameters', 'ERR'])
print(r)

plot_results(y=y_valid, yhat = yhat, n=1000)
ee = compute_residues_autocorrelation(y_valid, yhat)
plot_residues_correlation(data=ee, title="Residues", ylabel="$e^2$")
x1e = compute_cross_correlation(y_valid, yhat, x_valid)
plot_residues_correlation(data=x1e, title="Residues", ylabel="$x_1e$")
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\model_structure_selection\forward_regression_orthogonal_least_squares.py:472: UserWarning: n_info_values is greater than the maximum number of all regressors space considering the chosen y_lag, u_lag, and non_degree. We set as 15 
  self.info_values = self.information_criterion(reg_matrix, y)
0.08011571455967419
       Regressors   Parameters             ERR
0          y(k-1)   1.3016E+00  9.86000384E-01
1       x1(k-1)^2   1.0393E+02  7.94805130E-03
2        y(k-2)^2   1.6288E-05  2.50905908E-03
3   x1(k-1)y(k-1)  -1.2567E-01  1.43301039E-03
4          y(k-2)  -5.0784E-01  1.02781443E-03
5   x1(k-1)y(k-2)   5.6049E-02  5.35200312E-04
6         x1(k-2)   3.4986E+02  2.79648078E-04
7   x1(k-2)y(k-1)  -8.4030E-02  1.12211942E-04
8  x1(k-2)x1(k-1)  -7.8186E+00  4.54743448E-05
9   x1(k-2)y(k-2)   3.4050E-02  3.25346101E-05
../_images/identification_of_an_electromechanical_system_8_21.png ../_images/identification_of_an_electromechanical_system_8_3.png ../_images/identification_of_an_electromechanical_system_8_4.png

Testing different autoregressive models

from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.naive_bayes import GaussianNB
from catboost import CatBoostRegressor
from sklearn.linear_model import BayesianRidge, ARDRegression
from sysidentpy.general_estimators import NARX

basis_function = Polynomial(degree=2)


estimators = [
    ('KNeighborsRegressor', NARX(
        base_estimator=KNeighborsRegressor(),
        xlag=10,
        ylag=10)),
    ('NARX-DecisionTreeRegressor', NARX(
        base_estimator=DecisionTreeRegressor(),
        xlag=10,
        ylag=10)),
    ('NARX-RandomForestRegressor', NARX(
        base_estimator=RandomForestRegressor(
            n_estimators=200),
        xlag=10,
        ylag=10,
    )),
    ('NARX-Catboost', NARX(
        base_estimator=CatBoostRegressor(
        iterations=800,
        learning_rate=0.1,
        depth=8),
        xlag=10,
        ylag=10,
        non_degree=1,
        fit_params={'verbose': False}
    )),
    ('NARX-ARD', NARX(
        base_estimator=ARDRegression(),
        xlag=10,
        ylag=10,
        non_degree=2
    )),
    ('FROLS-Polynomial_NARX', FROLS(
        order_selection=True,
        n_info_values=50,
        extended_least_squares=False,
        ylag=10, xlag=10,
        info_criteria='bic',
        estimator='recursive_least_squares',
        basis_function=basis_function
        )
    ),
    
    ]

resultados = {}
for nome_do_modelo, modelo in estimators:
    resultados['%s' % (nome_do_modelo)] = []
    modelo.fit(X=x_train, y=y_train)
    yhat = modelo.predict(X=x_valid, y=y_valid)
    result = root_relative_squared_error(y_valid[modelo.max_lag:], yhat[modelo.max_lag:])
    resultados['%s' % (nome_do_modelo)].append(result)
    print(nome_do_modelo, '%.3f' % np.mean(result))
10-14 20:35:42 - INFO - Training the model
10-14 20:35:42 - INFO - Creating the regressor matrix
10-14 20:35:42 - INFO - The regressor matrix have 21 features
10-14 20:35:42 - INFO - Done! Model is built!
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\utils\deprecation.py:27: FutureWarning: Function __init__ has been deprecated since v0.1.7.
 Use NARXNN(ylag=2, xlag=2, basis_function='Some basis function') instead.This module was deprecated in favor of NARXNN(ylag=2, xlag=2, basis_function='Some basis function') module into which all the refactored classes and functions are moved.
 This feature will be removed in version v0.2.0.
  warnings.warn(message, FutureWarning)
KNeighborsRegressor 1.871
10-14 20:35:42 - INFO - Training the model
10-14 20:35:42 - INFO - Creating the regressor matrix
10-14 20:35:42 - INFO - The regressor matrix have 21 features
10-14 20:35:42 - INFO - Done! Model is built!
NARX-DecisionTreeRegressor 0.137
10-14 20:35:43 - INFO - Training the model
10-14 20:35:43 - INFO - Creating the regressor matrix
10-14 20:35:43 - INFO - The regressor matrix have 21 features
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\general_estimators\narx.py:161: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  self.base_estimator.fit(X, y, **self.fit_params)
10-14 20:35:43 - INFO - Done! Model is built!
NARX-RandomForestRegressor 0.167
10-14 20:35:47 - INFO - Training the model
10-14 20:35:47 - INFO - Creating the regressor matrix
10-14 20:35:47 - INFO - The regressor matrix have 21 features
10-14 20:35:49 - INFO - Done! Model is built!
NARX-Catboost 0.182
10-14 20:35:50 - INFO - Training the model
10-14 20:35:50 - INFO - Creating the regressor matrix
10-14 20:35:50 - INFO - The regressor matrix have 231 features
10-14 20:35:50 - INFO - Done! Model is built!
C:\Users\wilso\miniconda3\envs\sysidentpy\lib\site-packages\sklearn\utils\validation.py:63: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  return f(*args, **kwargs)
NARX-ARD 0.075
FROLS-Polynomial_NARX 0.047
for aux_results, results in sorted(resultados.items(), key=lambda x: np.mean(x[1]), reverse=False):
    print(aux_results, np.mean(results))
FROLS-Polynomial_NARX 0.04663897799085836
NARX-ARD 0.07507698375814373
NARX-DecisionTreeRegressor 0.1370352822444405
NARX-RandomForestRegressor 0.16746617689296395
NARX-Catboost 0.1818567377511571
KNeighborsRegressor 1.8710938813353852