Source code for sysidentpy.utils._check_arrays

""" Utilities fo data validation """
# Authors:
#           Wilson Rocha Lacerda Junior <wilsonrljr@outlook.com>
# License: BSD 3 clause

import numpy as np


[docs]def check_infinity(X, y): """Check that X and y have no NaN or Inf samples. If there is any NaN or Inf samples a ValueError is raised. Parameters ---------- X : ndarray of floats The input data. y : ndarray of floats The output data. """ if np.isinf(X).any(): msg_error = ( f"Input contains invalid values (e.g. NaN, Inf) on " f"index {np.argwhere(np.isinf(X))}" ) raise ValueError(msg_error) if np.isinf(y).any(): msg_error = ( f"Output contains invalid values (e.g Inf) on " f"index {np.argwhere(np.isinf(y))}" ) raise ValueError(msg_error)
[docs]def check_nan(X, y): """Check that X and y have no NaN or Inf samples. If there is any NaN or Inf samples a ValueError is raised. Parameters ---------- X : ndarray of floats The input data. y : ndarray of floats The output data. """ if np.isnan(X).any(): msg_error = ( f"Input contains invalid values (e.g. NaN, Inf) on " f"index {np.argwhere(np.isnan(X))}" ) raise ValueError(msg_error) if not ~np.isnan(y).any(): msg_error = ( f"Output contains invalid values (e.g. NaN, Inf) on " f"index {np.argwhere(np.isnan(y))}" ) raise ValueError(msg_error)
[docs]def check_length(X, y): """Check that X and y have the same number of samples. If the length of X and y are different a ValueError is raised. Parameters ---------- X : ndarray of floats The input data. y : ndarray of floats The output data. """ if X.shape[0] != y.shape[0]: msg_error = ( f"Input and output data must have the same number of " f"samples. X has dimension {X.shape} and " f"y has dimension {y.shape}" ) raise ValueError(msg_error)
[docs]def check_dimension(X, y): """Check if X and y have only real values. If there is any string or object samples a ValueError is raised. Parameters ---------- X : ndarray of floats The input data. y : ndarray of floats The output data. """ if X.ndim == 0: raise ValueError( ( f"Input must be a 2d array, got scalar instead.\n" f"Reshape your data using array.reshape(-1, 1)" ) ) if X.ndim == 1: raise ValueError( ( f"Input must be a 2d array, got 1d array instead.\n" f"Reshape your data using array.reshape(-1, 1)" ) ) if y.ndim == 0: raise ValueError( ( f"Output must be a 2d array, got scalar instead.\n" f"Reshape your data using array.reshape(-1, 1)" ) ) if y.ndim == 1: raise ValueError( ( f"Output must be a 2d array, got 1d array instead.\n" f"Reshape your data using array.reshape(-1, 1)" ) )
[docs]def check_X_y(X, y): """Validate input and output data using some crucial tests. Parameters ---------- X : ndarray of floats The input data. y : ndarray of floats The output data. """ check_length(X, y) check_dimension(X, y) check_infinity(X, y) check_nan(X, y)