Identification of an electromechanical system

Identification of an electromechanical system

Example created by Wilson Rocha Lacerda Junior

More details about this data can be found in the following paper (in Portuguese): https://www.researchgate.net/publication/320418710_Identificacao_de_um_motorgerador_CC_por_meio_de_modelos_polinomiais_autorregressivos_e_redes_neurais_artificiais

pip install sysidentpy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sysidentpy.model_structure_selection import FROLS
from sysidentpy.basis_function._basis_function import Polynomial
from sysidentpy.metrics import root_relative_squared_error
from sysidentpy.utils.generate_data import get_siso_data
from sysidentpy.utils.display_results import results
from sysidentpy.utils.plotting import plot_residues_correlation, plot_results
from sysidentpy.residues.residues_correlation import compute_residues_autocorrelation, compute_cross_correlation
df1 = pd.read_csv('examples/datasets/x_cc.csv')
df2 = pd.read_csv('examples/datasets/y_cc.csv')
df2[5000:80000].plot(figsize=(10, 4))
<AxesSubplot:>
../_images/identification_of_an_electromechanical_system_4_11.png
# we will decimate the data using d=500 in this example
x_train, x_valid = np.split(df1.iloc[::500].values, 2)
y_train, y_valid = np.split(df2.iloc[::500].values, 2)

Building a Polynomial NARX model

basis_function = Polynomial(degree=2)

model = FROLS(
    order_selection=True,
    n_info_values=40,
    extended_least_squares=False,
    ylag=2, xlag=2,
    info_criteria='bic',
    estimator='recursive_least_squares',
    basis_function=basis_function
)
model.fit(X=x_train, y=y_train)
yhat = model.predict(X=x_valid, y=y_valid)
rrse = root_relative_squared_error(y_valid, yhat)
print(rrse)

r = pd.DataFrame(
    results(
        model.final_model, model.theta, model.err,
        model.n_terms, err_precision=8, dtype='sci'
        ),
    columns=['Regressors', 'Parameters', 'ERR'])
print(r)

plot_results(y=y_valid, yhat = yhat, n=1000)
ee = compute_residues_autocorrelation(y_valid, yhat)
plot_residues_correlation(data=ee, title="Residues", ylabel="$e^2$")
x1e = compute_cross_correlation(y_valid, yhat, x_valid)
plot_residues_correlation(data=x1e, title="Residues", ylabel="$x_1e$")
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\model_structure_selection\forward_regression_orthogonal_least_squares.py:472: UserWarning: n_info_values is greater than the maximum number of all regressors space considering the chosen y_lag, u_lag, and non_degree. We set as 15 
  self.info_values = self.information_criterion(reg_matrix, y)
0.08011571455967419
       Regressors   Parameters             ERR
0          y(k-1)   1.3016E+00  9.86000384E-01
1       x1(k-1)^2   1.0393E+02  7.94805130E-03
2        y(k-2)^2   1.6288E-05  2.50905908E-03
3   x1(k-1)y(k-1)  -1.2567E-01  1.43301039E-03
4          y(k-2)  -5.0784E-01  1.02781443E-03
5   x1(k-1)y(k-2)   5.6049E-02  5.35200312E-04
6         x1(k-2)   3.4986E+02  2.79648078E-04
7   x1(k-2)y(k-1)  -8.4030E-02  1.12211942E-04
8  x1(k-2)x1(k-1)  -7.8186E+00  4.54743448E-05
9   x1(k-2)y(k-2)   3.4050E-02  3.25346101E-05
../_images/identification_of_an_electromechanical_system_8_21.png ../_images/identification_of_an_electromechanical_system_8_3.png ../_images/identification_of_an_electromechanical_system_8_4.png

Testing different autoregressive models

from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.naive_bayes import GaussianNB
from catboost import CatBoostRegressor
from sklearn.linear_model import BayesianRidge, ARDRegression
from sysidentpy.general_estimators import NARX

basis_function = Polynomial(degree=2)


estimators = [
    ('KNeighborsRegressor', NARX(
        base_estimator=KNeighborsRegressor(),
        xlag=10,
        ylag=10)),
    ('NARX-DecisionTreeRegressor', NARX(
        base_estimator=DecisionTreeRegressor(),
        xlag=10,
        ylag=10)),
    ('NARX-RandomForestRegressor', NARX(
        base_estimator=RandomForestRegressor(
            n_estimators=200),
        xlag=10,
        ylag=10,
    )),
    ('NARX-Catboost', NARX(
        base_estimator=CatBoostRegressor(
        iterations=800,
        learning_rate=0.1,
        depth=8),
        xlag=10,
        ylag=10,
        non_degree=1,
        fit_params={'verbose': False}
    )),
    ('NARX-ARD', NARX(
        base_estimator=ARDRegression(),
        xlag=10,
        ylag=10,
        non_degree=2
    )),
    ('FROLS-Polynomial_NARX', FROLS(
        order_selection=True,
        n_info_values=50,
        extended_least_squares=False,
        ylag=10, xlag=10,
        info_criteria='bic',
        estimator='recursive_least_squares',
        basis_function=basis_function
        )
    ),
    
    ]

resultados = {}
for nome_do_modelo, modelo in estimators:
    resultados['%s' % (nome_do_modelo)] = []
    modelo.fit(X=x_train, y=y_train)
    yhat = modelo.predict(X=x_valid, y=y_valid)
    result = root_relative_squared_error(y_valid[modelo.max_lag:], yhat[modelo.max_lag:])
    resultados['%s' % (nome_do_modelo)].append(result)
    print(nome_do_modelo, '%.3f' % np.mean(result))
10-14 20:35:42 - INFO - Training the model
10-14 20:35:42 - INFO - Creating the regressor matrix
10-14 20:35:42 - INFO - The regressor matrix have 21 features
10-14 20:35:42 - INFO - Done! Model is built!
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\utils\deprecation.py:27: FutureWarning: Function __init__ has been deprecated since v0.1.7.
 Use NARXNN(ylag=2, xlag=2, basis_function='Some basis function') instead.This module was deprecated in favor of NARXNN(ylag=2, xlag=2, basis_function='Some basis function') module into which all the refactored classes and functions are moved.
 This feature will be removed in version v0.2.0.
  warnings.warn(message, FutureWarning)
KNeighborsRegressor 1.871
10-14 20:35:42 - INFO - Training the model
10-14 20:35:42 - INFO - Creating the regressor matrix
10-14 20:35:42 - INFO - The regressor matrix have 21 features
10-14 20:35:42 - INFO - Done! Model is built!
NARX-DecisionTreeRegressor 0.137
10-14 20:35:43 - INFO - Training the model
10-14 20:35:43 - INFO - Creating the regressor matrix
10-14 20:35:43 - INFO - The regressor matrix have 21 features
c:\Users\wilso\Desktop\projects\GitHub\sysidentpy\sysidentpy\general_estimators\narx.py:161: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  self.base_estimator.fit(X, y, **self.fit_params)
10-14 20:35:43 - INFO - Done! Model is built!
NARX-RandomForestRegressor 0.167
10-14 20:35:47 - INFO - Training the model
10-14 20:35:47 - INFO - Creating the regressor matrix
10-14 20:35:47 - INFO - The regressor matrix have 21 features
10-14 20:35:49 - INFO - Done! Model is built!
NARX-Catboost 0.182
10-14 20:35:50 - INFO - Training the model
10-14 20:35:50 - INFO - Creating the regressor matrix
10-14 20:35:50 - INFO - The regressor matrix have 231 features
10-14 20:35:50 - INFO - Done! Model is built!
C:\Users\wilso\miniconda3\envs\sysidentpy\lib\site-packages\sklearn\utils\validation.py:63: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  return f(*args, **kwargs)
NARX-ARD 0.075
FROLS-Polynomial_NARX 0.047
for aux_results, results in sorted(resultados.items(), key=lambda x: np.mean(x[1]), reverse=False):
    print(aux_results, np.mean(results))
FROLS-Polynomial_NARX 0.04663897799085836
NARX-ARD 0.07507698375814373
NARX-DecisionTreeRegressor 0.1370352822444405
NARX-RandomForestRegressor 0.16746617689296395
NARX-Catboost 0.1818567377511571
KNeighborsRegressor 1.8710938813353852