Skip to content

Documentation for Neural NARX

Utilities fo data validation

check_X_y(X, y)

Validate input and output data using some crucial tests.

Parameters:

Name Type Description Default
X ndarray of floats

The input data.

required
y ndarray of floats

The output data.

required
Source code in sysidentpy\utils\_check_arrays.py
def check_X_y(X, y):
    """Validate input and output data using some crucial tests.

    Parameters
    ----------
    X : ndarray of floats
        The input data.
    y : ndarray of floats
        The output data.

    """
    check_length(X, y)
    check_dimension(X, y)
    check_infinity(X, y)
    check_nan(X, y)

check_dimension(X, y)

Check if X and y have only real values.

If there is any string or object samples a ValueError is raised.

Parameters:

Name Type Description Default
X ndarray of floats

The input data.

required
y ndarray of floats

The output data.

required
Source code in sysidentpy\utils\_check_arrays.py
def check_dimension(X, y):
    """Check if X and y have only real values.

    If there is any string or object samples a ValueError is raised.

    Parameters
    ----------
    X : ndarray of floats
        The input data.
    y : ndarray of floats
        The output data.

    """
    if X.ndim == 0:
        raise ValueError(
            "Input must be a 2d array, got scalar instead. Reshape your data using"
            " array.reshape(-1, 1)"
        )

    if X.ndim == 1:
        raise ValueError(
            "Input must be a 2d array, got 1d array instead. "
            "Reshape your data using array.reshape(-1, 1)"
        )

    if y.ndim == 0:
        raise ValueError(
            "Output must be a 2d array, got scalar instead. "
            "Reshape your data using array.reshape(-1, 1)"
        )

    if y.ndim == 1:
        raise ValueError(
            "Output must be a 2d array, got 1d array instead. "
            "Reshape your data using array.reshape(-1, 1)"
        )

check_infinity(X, y)

Check that X and y have no NaN or Inf samples.

If there is any NaN or Inf samples a ValueError is raised.

Parameters:

Name Type Description Default
X ndarray of floats

The input data.

required
y ndarray of floats

The output data.

required
Source code in sysidentpy\utils\_check_arrays.py
def check_infinity(X, y):
    """Check that X and y have no NaN or Inf samples.

    If there is any NaN or Inf samples a ValueError is raised.

    Parameters
    ----------
    X : ndarray of floats
        The input data.
    y : ndarray of floats
        The output data.

    """
    if np.isinf(X).any():
        msg_error = (
            "Input contains invalid values (e.g. NaN, Inf) on "
            f"index {np.argwhere(np.isinf(X))}"
        )
        raise ValueError(msg_error)

    if np.isinf(y).any():
        msg_error = (
            "Output contains invalid values (e.g Inf) on "
            f"index {np.argwhere(np.isinf(y))}"
        )
        raise ValueError(msg_error)

check_length(X, y)

Check that X and y have the same number of samples.

If the length of X and y are different a ValueError is raised.

Parameters:

Name Type Description Default
X ndarray of floats

The input data.

required
y ndarray of floats

The output data.

required
Source code in sysidentpy\utils\_check_arrays.py
def check_length(X, y):
    """Check that X and y have the same number of samples.

    If the length of X and y are different a ValueError is raised.

    Parameters
    ----------
    X : ndarray of floats
        The input data.
    y : ndarray of floats
        The output data.

    """
    if X.shape[0] != y.shape[0]:
        msg_error = (
            "Input and output data must have the same number of "
            f"samples. X has dimension {X.shape} and "
            f"y has dimension {y.shape}"
        )
        raise ValueError(msg_error)

check_nan(X, y)

Check that X and y have no NaN or Inf samples.

If there is any NaN or Inf samples a ValueError is raised.

Parameters:

Name Type Description Default
X ndarray of floats

The input data.

required
y ndarray of floats

The output data.

required
Source code in sysidentpy\utils\_check_arrays.py
def check_nan(X, y):
    """Check that X and y have no NaN or Inf samples.

    If there is any NaN or Inf samples a ValueError is raised.

    Parameters
    ----------
    X : ndarray of floats
        The input data.
    y : ndarray of floats
        The output data.

    """
    if np.isnan(X).any():
        msg_error = (
            "Input contains invalid values (e.g. NaN, Inf) on "
            f"index {np.argwhere(np.isnan(X))}"
        )
        raise ValueError(msg_error)

    if not ~np.isnan(y).any():
        msg_error = (
            "Output contains invalid values (e.g. NaN, Inf) on "
            f"index {np.argwhere(np.isnan(y))}"
        )
        raise ValueError(msg_error)

check_random_state(seed)

Turn seed into a np.random.RandomState instance.

Parameters:

Name Type Description Default
seed {None, int, `numpy.random.Generator`,
`numpy.random.RandomState`}, optional

If seed is None (or np.random), the numpy.random.RandomState singleton is used. If seed is an int, a new RandomState instance is used, seeded with seed. If seed is already a Generator or RandomState instance then that instance is used.

required

Returns:

Name Type Description
seed {`numpy.random.Generator`, `numpy.random.RandomState`}

Random number generator.

Source code in sysidentpy\utils\_check_arrays.py
def check_random_state(seed):
    """Turn `seed` into a `np.random.RandomState` instance.

    Parameters
    ----------
    seed : {None, int, `numpy.random.Generator`,
            `numpy.random.RandomState`}, optional
        If `seed` is None (or `np.random`), the `numpy.random.RandomState`
        singleton is used.
        If `seed` is an int, a new ``RandomState`` instance is used,
        seeded with `seed`.
        If `seed` is already a ``Generator`` or ``RandomState`` instance then
        that instance is used.

    Returns
    -------
    seed : {`numpy.random.Generator`, `numpy.random.RandomState`}
        Random number generator.

    """
    if seed is None or seed is np.random:
        return np.random.mtrand._rand
    if isinstance(seed, (numbers.Integral, np.integer)):
        return np.random.default_rng(seed)
    if isinstance(seed, (np.random.RandomState, np.random.Generator)):
        return seed

    raise ValueError(
        "%r cannot be used to seed a numpy.random.RandomState instance" % seed
    )

results(final_model=None, theta=None, err=None, n_terms=None, theta_precision=4, err_precision=8, dtype='dec')

Write the model regressors, parameters and ERR values.

This function returns the model regressors, its respective parameter and ERR value on a string matrix.

Parameters:

Name Type Description Default
theta_precision int (default: 4)

Precision of shown parameters values.

4
err_precision int (default: 8)

Precision of shown ERR values.

8
dtype string (default: 'dec')

Type of representation: sci - Scientific notation; dec - Decimal notation.

'dec'

Returns:

Name Type Description
output_matrix string

Where: First column represents each regressor element; Second column represents associated parameter; Third column represents the error reduction ratio associated to each regressor.

Source code in sysidentpy\utils\display_results.py
def results(
    final_model=None,
    theta=None,
    err=None,
    n_terms=None,
    theta_precision=4,
    err_precision=8,
    dtype="dec",
):
    """Write the model regressors, parameters and ERR values.

    This function returns the model regressors, its respective parameter
    and ERR value on a string matrix.

    Parameters
    ----------
    theta_precision : int (default: 4)
        Precision of shown parameters values.
    err_precision : int (default: 8)
        Precision of shown ERR values.
    dtype : string (default: 'dec')
        Type of representation:
        sci - Scientific notation;
        dec - Decimal notation.

    Returns
    -------
    output_matrix : string
        Where:
            First column represents each regressor element;
            Second column represents associated parameter;
            Third column represents the error reduction ratio associated
            to each regressor.

    """
    if not isinstance(theta_precision, int) or theta_precision < 1:
        raise ValueError(
            "theta_precision must be integer and > zero. Got %f" % theta_precision
        )

    if not isinstance(err_precision, int) or err_precision < 1:
        raise ValueError(
            "err_precision must be integer and > zero. Got %f" % err_precision
        )

    if dtype not in ("dec", "sci"):
        raise ValueError("dtype must be dec or sci. Got %s" % dtype)

    output_matrix = []
    theta_output_format = "{:." + str(theta_precision)
    err_output_format = "{:." + str(err_precision)

    if dtype == "dec":
        theta_output_format = theta_output_format + "f}"
        err_output_format = err_output_format + "f}"
    else:
        theta_output_format = theta_output_format + "E}"
        err_output_format = err_output_format + "E}"

    for i in range(0, n_terms):
        if np.max(final_model[i]) < 1:
            tmp_regressor = str(1)
        else:
            regressor_dic = Counter(final_model[i])
            regressor_string = []
            for j in range(0, len(list(regressor_dic.keys()))):
                regressor_key = list(regressor_dic.keys())[j]
                if regressor_key < 1:
                    translated_key = ""
                    translated_exponent = ""
                else:
                    delay_string = str(
                        int(regressor_key - np.floor(regressor_key / 1000) * 1000)
                    )
                    if int(regressor_key / 1000) < 2:
                        translated_key = "y(k-" + delay_string + ")"
                    else:
                        translated_key = (
                            "x"
                            + str(int(regressor_key / 1000) - 1)
                            + "(k-"
                            + delay_string
                            + ")"
                        )
                    if regressor_dic[regressor_key] < 2:
                        translated_exponent = ""
                    else:
                        translated_exponent = "^" + str(regressor_dic[regressor_key])
                regressor_string.append(translated_key + translated_exponent)
            tmp_regressor = "".join(regressor_string)

        current_parameter = theta_output_format.format(theta[i, 0])
        current_err = err_output_format.format(err[i])
        current_output = [tmp_regressor, current_parameter, current_err]
        output_matrix.append(current_output)

    return output_matrix

Utilities for data generation

get_miso_data(n=5000, colored_noise=False, sigma=0.05, train_percentage=90)

Perform the Error Reduction Ration algorithm.

Parameters:

Name Type Description Default
n int

The number of samples.

5000
colored_noise bool

Select white noise or colored noise (autoregressive noise).

False
sigma float

The standard deviation of the random distribution to generate the noise.

0.05
train_percentage int

The percentage of the data to be used as train data.

90

Returns:

Type Description
x_train, x_valid : array-like

The input data to be used in identification and validation, respectively.

y_train, y_valid : array-like

The output data to be used in identification and validation, respectively.

Source code in sysidentpy\utils\generate_data.py
def get_miso_data(n=5000, colored_noise=False, sigma=0.05, train_percentage=90):
    """Perform the Error Reduction Ration algorithm.

    Parameters
    ----------
    n : int
        The number of samples.
    colored_noise : bool
        Select white noise or colored noise (autoregressive noise).
    sigma : float
        The standard deviation of the random distribution to generate
        the noise.
    train_percentage : int
        The percentage of the data to be used as train data.

    Returns
    -------
    x_train, x_valid : array-like
        The input data to be used in identification and validation,
        respectively.
    y_train, y_valid : array-like
        The output data to be used in identification and validation,
        respectively.

    """
    mu = 0  # mean of the distribution
    nu = np.random.normal(mu, sigma, n).T
    e = np.zeros((n, 1))

    lag = 2
    if colored_noise is True:
        for k in range(lag, len(e)):
            e[k] = 0.8 * nu[k - 1] + nu[k]
    else:
        e = nu

    x1 = np.random.uniform(-1, 1, n).T
    x2 = np.random.uniform(-1, 1, n).T
    y = np.zeros((n, 1))
    theta = np.array([[0.4], [0.1], [0.6], [-0.3]])

    lag = 2
    for k in range(lag, len(e)):
        y[k] = (
            theta[0] * y[k - 1] ** 2
            + theta[1] * y[k - 1] * x1[k - 1]
            + theta[2] * x2[k - 1]
            + theta[3] * x1[k - 1] * x2[k - 2]
            + e[k]
        )

    split_data = int(len(x1) * (train_percentage / 100))
    x1_train = x1[0:split_data].reshape(-1, 1)
    x2_train = x2[0:split_data].reshape(-1, 1)
    x1_valid = x1[split_data::].reshape(-1, 1)
    x2_valid = x2[split_data::].reshape(-1, 1)

    x_train = np.hstack([x1_train, x2_train])
    x_valid = np.hstack([x1_valid, x2_valid])

    y_train = y[0:split_data].reshape(-1, 1)
    y_valid = y[split_data::].reshape(-1, 1)

    return x_train, x_valid, y_train, y_valid

get_siso_data(n=5000, colored_noise=False, sigma=0.05, train_percentage=90)

Perform the Error Reduction Ration algorithm.

Parameters:

Name Type Description Default
n int

The number of samples.

5000
colored_noise bool

Select white noise or colored noise (autoregressive noise).

False
sigma float

The standard deviation of the random distribution to generate the noise.

0.05
train_percentage int

The percentage of the data to be used as train data.

90

Returns:

Type Description
x_train, x_valid : array-like

The input data to be used in identification and validation, respectively.

y_train, y_valid : array-like

The output data to be used in identification and validation, respectively.

Source code in sysidentpy\utils\generate_data.py
def get_siso_data(n=5000, colored_noise=False, sigma=0.05, train_percentage=90):
    """Perform the Error Reduction Ration algorithm.

    Parameters
    ----------
    n : int
        The number of samples.
    colored_noise : bool
        Select white noise or colored noise (autoregressive noise).
    sigma : float
        The standard deviation of the random distribution to generate
        the noise.
    train_percentage : int
        The percentage of the data to be used as train data.

    Returns
    -------
    x_train, x_valid : array-like
        The input data to be used in identification and validation,
        respectively.
    y_train, y_valid : array-like
        The output data to be used in identification and validation,
        respectively.

    """
    mu = 0  # mean of the distribution
    nu = np.random.normal(mu, sigma, n).T
    e = np.zeros((n, 1))

    lag = 2
    if colored_noise is True:
        for k in range(lag, len(e)):
            e[k] = 0.8 * nu[k - 1] + nu[k]
    else:
        e = nu

    x = np.random.uniform(-1, 1, n).T
    y = np.zeros((n, 1))
    theta = np.array([[0.2], [0.1], [0.9]])
    lag = 2
    for k in range(lag, len(x)):
        y[k] = (
            theta[0] * y[k - 1]
            + theta[1] * y[k - 1] * x[k - 1]
            + theta[2] * x[k - 2]
            + e[k]
        )

    split_data = int(len(x) * (train_percentage / 100))

    x_train = x[0:split_data].reshape(-1, 1)
    x_valid = x[split_data::].reshape(-1, 1)

    y_train = y[0:split_data].reshape(-1, 1)
    y_valid = y[split_data::].reshape(-1, 1)

    return x_train, x_valid, y_train, y_valid

Utils methods for NARMAX modeling

set_weights(*, static_function=True, static_gain=True, start=-0.01, stop=-5, num=50, base=2.71)

Set log-spaced weights assigned to each objective in the multi-objective optimization.

Returns:

Name Type Description
weights ndarray of floats

An array containing the weights for each objective.

Notes

This method calculates the weights to be assigned to different objectives in multi-objective optimization. The choice of weights depends on the presence of static function and static gain data. If both are present, a set of weights for dynamic, gain, and static objectives is computed. If either static function or static gain is absent, a simplified set of weights is generated.

Source code in sysidentpy\utils\narmax_tools.py
def set_weights(
    *,
    static_function: bool = True,
    static_gain: bool = True,
    start: float = -0.01,
    stop: float = -5,
    num: int = 50,
    base: float = 2.71,
) -> np.ndarray:
    """
    Set log-spaced weights assigned to each objective in the multi-objective
    optimization.

    Returns
    -------
    weights : ndarray of floats
        An array containing the weights for each objective.

    Notes
    -----
    This method calculates the weights to be assigned to different objectives in
    multi-objective optimization. The choice of weights depends on the presence
    of static function and static gain data. If both are present, a set of weights
    for dynamic, gain, and static objectives is computed. If either static function
    or static gain is absent, a simplified set of weights is generated.

    """
    w1 = np.logspace(start=start, stop=stop, num=num, base=base)
    if static_function is False or static_gain is False:
        w2 = 1 - w1
        return np.vstack([w1, w2])

    w2 = w1[::-1]
    w1_grid, w2_grid = np.meshgrid(w1, w2)
    w3_grid = 1 - (w1_grid + w2_grid)
    mask = w1_grid + w2_grid <= 1
    dynamic_weight = np.flip(w1_grid[mask])
    gain_weight = np.flip(w2_grid[mask])
    static_weight = np.flip(w3_grid[mask])
    return np.vstack([dynamic_weight, gain_weight, static_weight])

Plotting methods

plot_residues_correlation(data=None, *, figsize=(10, 6), n=100, style='default', facecolor='white', title='Residual Analysis', ylabel='Correlation')

Plot the residual validation.

Source code in sysidentpy\utils\plotting.py
def plot_residues_correlation(
    data=None,
    *,
    figsize: Tuple[int, int] = (10, 6),
    n: int = 100,
    style: str = "default",
    facecolor: str = "white",
    title: str = "Residual Analysis",
    ylabel: str = "Correlation",
) -> None:
    """Plot the residual validation."""
    plt.style.use(style)
    plt.rcParams["axes.facecolor"] = facecolor
    _, ax = plt.subplots(figsize=figsize, facecolor=facecolor)
    ax.plot(data[0][:n], color="#1f77b4")
    ax.axhspan(data[1], data[2], color="#ccd9ff", alpha=0.5, lw=0)
    ax.set_xlabel("Lag", fontsize=14)
    ax.set_ylabel(ylabel, fontsize=14)
    ax.tick_params(labelsize=14)
    ax.set_ylim([-1, 1])
    ax.set_title(title, fontsize=18)
    plt.show()

plot_results(y, *, yhat, n=100, title='Free run simulation', xlabel='Samples', ylabel='y, $\\hat{y}$', data_color='#1f77b4', model_color='#ff7f0e', marker='o', model_marker='*', linewidth=1.5, figsize=(10, 6), style='default', facecolor='white')

Plot the results of a simulation.

Parameters:

y : np.ndarray True data values. yhat : np.ndarray Model predictions. n : int Number of samples to plot. title : str Plot title. xlabel : str Label for the x-axis. ylabel : str Label for the y-axis. data_color : str Color for the data line. model_color : str Color for the model line. marker : str Marker style for the data line. model_marker : str Marker style for the model line. linewidth : float Line width for both lines. figsize : Tuple[int, int] Figure size (width, height). style : str Matplotlib style. facecolor : str Figure facecolor.

Returns:

None
Source code in sysidentpy\utils\plotting.py
def plot_results(
    y: np.ndarray,
    *,
    yhat: np.ndarray,
    n: int = 100,
    title: str = "Free run simulation",
    xlabel: str = "Samples",
    ylabel: str = "y, $\hat{y}$",
    data_color: str = "#1f77b4",
    model_color: str = "#ff7f0e",
    marker: str = "o",
    model_marker: str = "*",
    linewidth: float = 1.5,
    figsize: Tuple[int, int] = (10, 6),
    style: str = "default",
    facecolor: str = "white",
) -> None:
    """Plot the results of a simulation.

    Parameters:
    -----------
    y : np.ndarray
        True data values.
    yhat : np.ndarray
        Model predictions.
    n : int
        Number of samples to plot.
    title : str
        Plot title.
    xlabel : str
        Label for the x-axis.
    ylabel : str
        Label for the y-axis.
    data_color : str
        Color for the data line.
    model_color : str
        Color for the model line.
    marker : str
        Marker style for the data line.
    model_marker : str
        Marker style for the model line.
    linewidth : float
        Line width for both lines.
    figsize : Tuple[int, int]
        Figure size (width, height).
    style : str
        Matplotlib style.
    facecolor : str
        Figure facecolor.

    Returns:
    --------
        None

    """
    assert len(y) >= 1 and len(yhat) >= 1, "Arrays must have at least 1 samples."

    # Set Matplotlib style and figure properties
    plt.style.use(style)
    plt.rcParams["axes.facecolor"] = facecolor

    _, ax = plt.subplots(figsize=figsize, facecolor=facecolor)
    ax.plot(
        y[:n], c=data_color, alpha=1, marker=marker, label="Data", linewidth=linewidth
    )
    ax.plot(
        yhat[:n], c=model_color, marker=model_marker, label="Model", linewidth=linewidth
    )

    # Customize plot properties
    ax.set_title(title, fontsize=18)
    ax.legend()
    ax.tick_params(labelsize=14)
    ax.set_xlabel(xlabel, fontsize=14)
    ax.set_ylabel(ylabel, fontsize=14)
    plt.show()

load_model(*, file_name='model', path=None)

This method loads the model from file "file_name.syspy" located at path "path"

Parameters:

Name Type Description Default
file_name

model to be loaded

'model'
path
None

Returns:

Name Type Description
model_loaded model loaded, as a variable, containing model and its attributes
Source code in sysidentpy\utils\save_load.py
def load_model(
    *,
    file_name="model",
    path=None,
):

    """This method loads the model from file "file_name.syspy" located at path "path"

    Parameters
    ----------
    file_name: file name (str), along with .syspy extension of the file containing
        model to be loaded
    path: location where "file_name.syspy" is (optional).

    Returns
    -------
    model_loaded: model loaded, as a variable, containing model and its attributes

    """
    # Checking if path is provided
    if path is not None:

        # Composing file_name with path
        file_name = os.path.join(path, file_name)

    # Loading the model
    with open(file_name, "rb") as fp:
        model_loaded = pk.load(fp)

    return model_loaded

save_model(*, model=None, file_name='model', path=None)

This method saves the model "model" in folder "folder" using an extension .syspy

Parameters:

Name Type Description Default
model
None
file_name
'model'
path
None

Returns:

Type Description
file file_name.syspy located at "path", containing the estimated model.
Source code in sysidentpy\utils\save_load.py
def save_model(
    *,
    model=None,
    file_name="model",
    path=None,
):
    """This method saves the model "model" in folder "folder" using an extension .syspy

    Parameters
    ----------
    model: the model variable to be saved
    file_name: file name, along with .syspy extension
    path: location where the model will be saved (optional)

    Returns
    ----------
    file file_name.syspy located at "path", containing the estimated model.

    """

    if model is None:
        raise TypeError("model cannot be None.")

    # Checking if path is provided
    if path is not None:

        # Composing file_name with path
        file_name = os.path.join(path, file_name)

    # Saving model
    with open(file_name, "wb") as fp:
        pk.dump(model, fp)