V0.1.6 - Identification of an electromechanical system

Example created by Wilson Rocha Lacerda Junior

More details about this data can be found in the following paper (in Portuguese): https://www.researchgate.net/publication/320418710_Identificacao_de_um_motorgerador_CC_por_meio_de_modelos_polinomiais_autorregressivos_e_redes_neurais_artificiais

pip install sysidentpy
import pandas as pd
import numpy as np
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sysidentpy.polynomial_basis import PolynomialNarmax
from sysidentpy.metrics import root_relative_squared_error
from sysidentpy.utils.generate_data import get_miso_data, get_siso_data
df1 = pd.read_csv('data/x_cc.csv')
df2 = pd.read_csv('data/y_cc.csv')
df2[5000:80000].plot(figsize=(10, 4))
<AxesSubplot:>
../_images/identification_of_an_electromechanical_system_4_1.png
# we will decimate the data using d=500 in this example
x_train, x_valid = np.split(df1.iloc[::500].values, 2)
y_train, y_valid = np.split(df2.iloc[::500].values, 2)

Building a Polynomial NARX model

model = PolynomialNarmax(non_degree=2,
                         order_selection=True,
                         n_info_values=40,
                         extended_least_squares=False,
                         ylag=2, xlag=2,
                         info_criteria='bic',
                         estimator='recursive_least_squares'
                         )
model.fit(x_train, y_train)
yhat = model.predict(x_valid, y_valid)
rrse = root_relative_squared_error(y_valid, yhat)
print(rrse)
results = pd.DataFrame(model.results(err_precision=8,
                                     dtype='dec'),
                       columns=['Regressors', 'Parameters', 'ERR'])

print(results)

ee, ex, extras, lam = model.residuals(x_valid, y_valid, yhat)
model.plot_result(y_valid, yhat, ee, ex, n=300)
C:\Users\wilso\miniconda3\envs\v0.1.4\lib\site-packages\sysidentpy\polynomial_basis\narmax.py:333: UserWarning: n_info_values is greater than the maximum number of all regressors space considering the chosen y_lag, u_lag, and non_degree. We set as 15 
  self.info_values = self.information_criterion(X, y)
0.08011571455094049
       Regressors Parameters         ERR
0          y(k-1)     1.3016  0.98600038
1       x1(k-1)^2   103.9300  0.00794805
2        y(k-2)^2     0.0000  0.00250906
3   x1(k-1)y(k-1)    -0.1257  0.00143301
4          y(k-2)    -0.5078  0.00102781
5   x1(k-1)y(k-2)     0.0560  0.00053520
6         x1(k-2)   349.8627  0.00027965
7   x1(k-2)y(k-1)    -0.0840  0.00011221
8  x1(k-2)x1(k-1)    -7.8186  0.00004547
9   x1(k-2)y(k-2)     0.0341  0.00003253
../_images/identification_of_an_electromechanical_system_8_2.png

Testing different autoregressive models

from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.naive_bayes import GaussianNB
from catboost import CatBoostRegressor
from sklearn.linear_model import BayesianRidge, ARDRegression
from sysidentpy.general_estimators import NARX


estimators = [
    ('KNeighborsRegressor', NARX(
        base_estimator=KNeighborsRegressor(),
        xlag=10,
        ylag=10)),
    ('DecisionTreeRegressor', NARX(
        base_estimator=DecisionTreeRegressor(),
        xlag=10,
        ylag=10)),
    ('RandomForestRegressor', NARX(
        base_estimator=RandomForestRegressor(
            n_estimators=200),
        xlag=10,
        ylag=10,
    )),
    ('Catboost_NARX', NARX(
        base_estimator=CatBoostRegressor(
        iterations=800,
        learning_rate=0.1,
        depth=8),
        xlag=10,
        ylag=10,
        non_degree=1,
        fit_params={'verbose': False}
    )),
    ('ARD_NARX', NARX(
        base_estimator=ARDRegression(),
        xlag=10,
        ylag=10,
        non_degree=2
    )),
    ('Polynomial_NARX', PolynomialNarmax(non_degree=2,
                         order_selection=True,
                         n_info_values=50,
                         extended_least_squares=False,
                         ylag=10, xlag=10,
                         info_criteria='bic',
                         estimator='recursive_least_squares',
                         )),
    
    ]




resultados = {}
for nome_do_modelo, modelo in estimators:
    resultados['%s' % (nome_do_modelo)] = []
    modelo.fit(x_train, y_train)
    yhat = modelo.predict(x_valid,  y_valid)
    result = root_relative_squared_error(y_valid, yhat)
    resultados['%s' % (nome_do_modelo)].append(result)
    print(nome_do_modelo, '%.3f' % np.mean(result))
03-08 22:53:49 - INFO - Training the model
03-08 22:53:49 - INFO - Creating the regressor matrix
03-08 22:53:49 - INFO - The regressor matrix have 21 features
03-08 22:53:49 - INFO - Done! Model is built!
KNeighborsRegressor 1.833
03-08 22:53:50 - INFO - Training the model
03-08 22:53:50 - INFO - Creating the regressor matrix
03-08 22:53:50 - INFO - The regressor matrix have 21 features
03-08 22:53:50 - INFO - Done! Model is built!
DecisionTreeRegressor 0.346
03-08 22:53:50 - INFO - Training the model
03-08 22:53:50 - INFO - Creating the regressor matrix
03-08 22:53:50 - INFO - The regressor matrix have 21 features
C:\Users\wilso\miniconda3\envs\v0.1.4\lib\site-packages\sysidentpy\general_estimators\narx.py:154: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  self.base_estimator.fit(X, y, **self.fit_params)
03-08 22:53:51 - INFO - Done! Model is built!
RandomForestRegressor 0.178
03-08 22:54:03 - INFO - Training the model
03-08 22:54:03 - INFO - Creating the regressor matrix
03-08 22:54:03 - INFO - The regressor matrix have 21 features
03-08 22:54:12 - INFO - Done! Model is built!
Catboost_NARX 0.179
03-08 22:54:13 - INFO - Training the model
03-08 22:54:13 - INFO - Creating the regressor matrix
03-08 22:54:13 - INFO - The regressor matrix have 231 features
C:\Users\wilso\miniconda3\envs\v0.1.4\lib\site-packages\sklearn\utils\validation.py:63: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  return f(*args, **kwargs)
03-08 22:54:14 - INFO - Done! Model is built!
ARD_NARX 0.074
Polynomial_NARX 0.032
for aux_results, results in sorted(resultados.items(), key=lambda x: np.mean(x[1]), reverse=False):
    print(aux_results, np.mean(results))
Polynomial_NARX 0.03208119098997185
ARD_NARX 0.07364659571266981
RandomForestRegressor 0.17752025859574144
Catboost_NARX 0.17923040407121107
DecisionTreeRegressor 0.3457206837080308
KNeighborsRegressor 1.833370478725381