# Source code for sysidentpy.metrics._regression

```"""Common metrics to assess performance on NARX models."""

# Authors:
#           Wilson Rocha Lacerda Junior <wilsonrljr@outlook.com>
#           Luan Pascoal da Costa Andrade <luan_pascoal13@hotmail.com>
#           Samuel Carlos Pessoa Oliveira <samuelcpoliveira@gmail.com>
#           Samir Angelo Milani Martins <martins@ufsj.edu.br>

import numpy as np

__ALL__ = [
"forecast_error",
"mean_forecast_error",
"mean_squared_error",
"root_mean_squared_error",
"normalized_root_mean_squared_error",
"root_relative_squared_error",
"mean_absolute_error",
"mean_squared_log_error",
"median_absolute_error",
"explained_variance_score",
"r2_score",
"symmetric_mean_absolute_percentage_error",
]

[docs]def forecast_error(y, y_predicted):
"""Calculate the forecast error in a regression model.

Parameters
----------
y : array-like of shape = number_of_outputs
Represent the target values.
y_predicted : array-like of shape = number_of_outputs
Target values predicted by the model.

Returns
-------
loss : ndarray of floats
The difference between the true target values and the predicted
or forecast value in regression or any other phenomenon.

References
----------
 Wikipedia entry on the Forecast error
https://en.wikipedia.org/wiki/Forecast_error

Examples
--------
>>> y = [3, -0.5, 2, 7]
>>> y_predicted = [2.5, 0.0, 2, 8]
>>> forecast_error(y, y_predicted)
[0.5, -0.5, 0, -1]

"""
return y - y_predicted

[docs]def mean_forecast_error(y, y_predicted):
"""Calculate the mean of forecast error of a regression model.

Parameters
----------
y : array-like of shape = number_of_outputs
Represent the target values.
y_predicted : array-like of shape = number_of_outputs
Target values predicted by the model.

Returns
-------
loss : float
The mean  value of the difference between the true target
values and the predicted or forecast value in regression
or any other phenomenon.

References
----------
 Wikipedia entry on the Forecast error
https://en.wikipedia.org/wiki/Forecast_error

Examples
--------
>>> y = [3, -0.5, 2, 7]
>>> y_predicted = [2.5, 0.0, 2, 8]
>>> mean_forecast_error(y, y_predicted)
-0.25

"""
return np.average(y - y_predicted)

[docs]def mean_squared_error(y, y_predicted):
"""Calculate the Mean Squared Error.

Parameters
----------
y : array-like of shape = number_of_outputs
Represent the target values.
y_predicted : array-like of shape = number_of_outputs
Target values predicted by the model.

Returns
-------
loss : float
MSE output is non-negative values. Becoming 0.0 means your
model outputs are exactly matched by true target values.

References
----------
 Wikipedia entry on the Mean Squared Error
https://en.wikipedia.org/wiki/Mean_squared_error

Examples
--------
>>> y = [3, -0.5, 2, 7]
>>> y_predicted = [2.5, 0.0, 2, 8]
>>> mean_squared_error(y, y_predicted)
0.375

"""
output_error = np.average((y - y_predicted) ** 2)
return np.average(output_error)

[docs]def root_mean_squared_error(y, y_predicted):
"""Calculate the Root Mean Squared Error.

Parameters
----------
y : array-like of shape = number_of_outputs
Represent the target values.
y_predicted : array-like of shape = number_of_outputs
Target values predicted by the model.

Returns
-------
loss : float
RMSE output is non-negative values. Becoming 0.0 means your
model outputs are exactly matched by true target values.

References
----------
 Wikipedia entry on the Root Mean Squared Error
https://en.wikipedia.org/wiki/Root-mean-square_deviation

Examples
--------
>>> y = [3, -0.5, 2, 7]
>>> y_predicted = [2.5, 0.0, 2, 8]
>>> root_mean_squared_error(y, y_predicted)
0.612

"""
return np.sqrt(mean_squared_error(y, y_predicted))

[docs]def normalized_root_mean_squared_error(y, y_predicted):
"""Calculate the normalized Root Mean Squared Error.

Parameters
----------
y : array-like of shape = number_of_outputs
Represent the target values.
y_predicted : array-like of shape = number_of_outputs
Target values predicted by the model.

Returns
-------
loss : float
nRMSE output is non-negative values. Becoming 0.0 means your
model outputs are exactly matched by true target values.

References
----------
 Wikipedia entry on the normalized Root Mean Squared Error
https://en.wikipedia.org/wiki/Root-mean-square_deviation

Examples
--------
>>> y = [3, -0.5, 2, 7]
>>> y_predicted = [2.5, 0.0, 2, 8]
>>> normalized_root_mean_squared_error(y, y_predicted)
0.081

"""
return root_mean_squared_error(y, y_predicted) / (y.max() - y.min())

[docs]def root_relative_squared_error(y, y_predicted):
"""Calculate the Root Relative Mean Squared Error.

Parameters
----------
y : array-like of shape = number_of_outputs
Represent the target values.
y_predicted : array-like of shape = number_of_outputs
Target values predicted by the model.

Returns
-------
loss : float
RRSE output is non-negative values. Becoming 0.0 means your
model outputs are exactly matched by true target values.

Examples
--------
>>> y = [3, -0.5, 2, 7]
>>> y_predicted = [2.5, 0.0, 2, 8]
>>> root_relative_mean_squared_error(y, y_predicted)
0.206

"""
numerator = np.sum(np.square((y_predicted - y)))
denominator = np.sum(np.square((y_predicted - np.mean(y, axis=0))))
return np.sqrt(np.divide(numerator, denominator))

[docs]def mean_absolute_error(y, y_predicted):
"""Calculate the Mean absolute error.

Parameters
----------
y : array-like of shape = number_of_outputs
Represent the target values.
y_predicted : array-like of shape = number_of_outputs
Target values predicted by the model.

Returns
-------
loss : float or ndarray of floats
MAE output is non-negative values. Becoming 0.0 means your
model outputs are exactly matched by true target values.

References
----------
 Wikipedia entry on the Mean absolute error
https://en.wikipedia.org/wiki/Mean_absolute_error

Examples
--------
>>> y = [3, -0.5, 2, 7]
>>> y_predicted = [2.5, 0.0, 2, 8]
>>> mean_absolute_error(y, y_predicted)
0.5

"""
output_errors = np.average(np.abs(y - y_predicted))
return np.average(output_errors)

[docs]def mean_squared_log_error(y, y_predicted):
"""Calculate the Mean Squared Logarithmic Error.

Parameters
----------
y : array-like of shape = number_of_outputs
Represent the target values.
y_predicted : array-like of shape = number_of_outputs
Target values predicted by the model.

Returns
-------
loss : float
MSLE output is non-negative values. Becoming 0.0 means your
model outputs are exactly matched by true target values.

Examples
--------
>>> y = [3, 5, 2.5, 7]
>>> y_predicted = [2.5, 5, 4, 8]
>>> mean_squared_log_error(y, y_predicted)
0.039

"""
return mean_squared_error(np.log1p(y), np.log1p(y_predicted))

[docs]def median_absolute_error(y, y_predicted):
"""Calculate the Median Absolute Error.

Parameters
----------
y : array-like of shape = number_of_outputs
Represent the target values.
y_predicted : array-like of shape = number_of_outputs
Target values predicted by the model.

Returns
-------
loss : float
MdAE output is non-negative values. Becoming 0.0 means your
model outputs are exactly matched by true target values.

References
----------
 Wikipedia entry on the Median absolute deviation
https://en.wikipedia.org/wiki/Median_absolute_deviation

Examples
--------
>>> y = [3, -0.5, 2, 7]
>>> y_predicted = [2.5, 0.0, 2, 8]
>>> median_absolute_error(y, y_predicted)
0.5

"""
return np.median(np.abs(y - y_predicted))

[docs]def explained_variance_score(y, y_predicted):
"""Calculate the Explained Variance Score.

Parameters
----------
y : array-like of shape = number_of_outputs
Represent the target values.
y_predicted : array-like of shape = number_of_outputs
Target values predicted by the model.

Returns
-------
loss : float
EVS output is non-negative values. Becoming 1.0 means your
model outputs are exactly matched by true target values.
Lower values means worse results.

References
----------
 Wikipedia entry on the Explained Variance
https://en.wikipedia.org/wiki/Explained_variation

Examples
--------
>>> y = [3, -0.5, 2, 7]
>>> y_predicted = [2.5, 0.0, 2, 8]
>>> explained_variance_score(y, y_predicted)
0.957

"""
y_diff_avg = np.average(y - y_predicted)
numerator = np.average((y - y_predicted - y_diff_avg) ** 2)
y_avg = np.average(y)
denominator = np.average((y - y_avg) ** 2)
nonzero_numerator = numerator != 0
nonzero_denominator = denominator != 0
valid_score = nonzero_numerator & nonzero_denominator
output_scores = np.ones(y.shape)
output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score])
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0
return np.average(output_scores)

[docs]def r2_score(y, y_predicted):
"""Calculate the R2 score.

Parameters
----------
y : array-like of shape = number_of_outputs
Represent the target values.
y_predicted : array-like of shape = number_of_outputs
Target values predicted by the model.

Returns
-------
loss : float
R2 output can be non-negative values or negative value.
Becoming 1.0 means your model outputs are exactly
matched by true target values. Lower values means worse results.

Notes
-----
This is not a symmetric function.

References
----------
 Wikipedia entry on the Coefficient of determination
https://en.wikipedia.org/wiki/Coefficient_of_determination

Examples
--------
>>> y = [3, -0.5, 2, 7]
>>> y_predicted = [2.5, 0.0, 2, 8]
>>> explained_variance_score(y, y_predicted)
0.948

"""
numerator = ((y - y_predicted) ** 2).sum(axis=0, dtype=np.float64)
denominator = ((y - np.average(y, axis=0)) ** 2).sum(axis=0, dtype=np.float64)
nonzero_denominator = denominator != 0
nonzero_numerator = numerator != 0
valid_score = nonzero_denominator & nonzero_numerator
output_scores = np.ones([y.shape])
output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score])
# arbitrary set to zero to avoid -inf scores, having a constant
# y_true is not interesting for scoring a regression anyway
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0
return np.average(output_scores)

[docs]def symmetric_mean_absolute_percentage_error(y, y_predicted):
"""Calculate the SMAPE score.

Parameters
----------
y : array-like of shape = number_of_outputs
Represent the target values.
y_predicted : array-like of shape = number_of_outputs
Target values predicted by the model.

Returns
-------
loss : float
SMAPE output is a non-negative value.
The results are percentages values.

Notes
-----
One supposed problem with SMAPE is that it is not symmetric since
over-forecasts and under-forecasts are not treated equally.

References
----------
 Wikipedia entry on the Symmetric mean absolute percentage error
https://en.wikipedia.org/wiki/Symmetric_mean_absolute_percentage_error

Examples
--------
>>> y = [3, -0.5, 2, 7]
>>> y_predicted = [2.5, 0.0, 2, 8]
>>> symmetric_mean_absolute_percentage_error(y, y_predicted)
57.87

"""
return (
100
/ len(y)
* np.sum(2 * np.abs(y_predicted - y) / (np.abs(y) + np.abs(y_predicted)))
)
```