Source code for aviary.utils.data_interpolator_builder

import warnings
import numpy as np
import openmdao.api as om

from pathlib import Path

from aviary.utils.named_values import get_keys, get_items
from aviary.utils.csv_data_file import read_data_file
from aviary.utils.functions import get_path
from aviary.utils.named_values import NamedValues



[docs]
def build_data_interpolator(num_nodes, interpolator_data=None, interpolator_outputs=None,
                            method='slinear', extrapolate=True, structured=None,
                            connect_training_data=False):
    """
    Builder for openMDAO metamodel components using data provided via data file, directly
    provided as an argument, or training data passed through openMDAO connections.
    If using a structured grid, data can either be converted from a semistructured
    grid format, or directly provided in structured grid format.

    Parameters
    ----------

    num_nodes : int
        Number of points that will be simultaneously interpolated during model executuion.

    interpolator_data : (str, Path, NamedValues)
        Path to the Aviary csv file containing all data required for interpolation, or
        the data directly given as a NamedValues object.

    interpolator_outputs : dict
        Dictionary describing the names of dependent variables (keys) and their
        units (values). If connect_training_data is False, these variable names must reference
        variables in data_file or interpolator_data. If connect_training_data is True, then
        this dictionary describes the names and units for training data that will be
        provided via openMDAO connections during model execution.

    method : str, optional
        Interpolation method for metamodel. See openMDAO documentation for valid
        options.

    extrapolate : bool, optional
        Flag that sets if metamodel should allow extrapolation

    structured : bool, optional
        Flag to set if interpolation data is a structure grid. If True, the 
        structured metamodel component is used, if False, the semistructured metamodel is
        used. If None, the builder chooses based on provided data structure.

    connect_training_data : bool, optional
        Flag that sets if dependent data for interpolation will be passed via openMDAO
        connections. If True, any provided values for dependent variables will
        be ignored.

    Returns
    -------
    interp_comp : om.MetaModelSemiStructuredComp, om.MetaModelStructuredComp
        OpenMDAO metamodel component using the provided data and flags
    """
    # Argument checking #
    if interpolator_outputs is None:
        raise UserWarning('Independent variables for interpolation were not provided.')
    # if interpolator data is a filepath, get data from file
    if isinstance(interpolator_data, str):
        interpolator_data = get_path(interpolator_data)
    if isinstance(interpolator_data, Path):
        interpolator_data = read_data_file(interpolator_data)

    # Pre-format data: Independent variables placed before dependent variables - position
    #                  of these variables relative to others of their type is preserved
    #                  All data converted to numpy arrays
    indep_vars = NamedValues()
    dep_vars = NamedValues()
    for (key, (val, units)) in get_items(interpolator_data):
        if not isinstance(val, np.ndarray):
            val = np.array(val)
        if key in interpolator_outputs:
            dep_vars.set_val(key, val, units)
        else:
            indep_vars.set_val(key, val, units)
    # update interpolator_data with correctly ordered indep/dep vars in numpy arrays
    interpolator_data.update(indep_vars)
    for (key, (val, units)) in get_items(dep_vars):
        interpolator_data.set_val(key, val, units)

    # TODO investigate creating structured grid from semistructured grid via extrapolation

    # is data already in structured format?
    # assume data is structured until proven otherwise
    data_pre_structured = True
    shape = []
    # check inputs, should be vector of unique values only
    for (key, (val, units)) in get_items(interpolator_data):
        if len(val.shape) == 1:
            if key not in interpolator_outputs:
                # try:
                if np.array_equal(np.unique(val), val):
                    # if vector is only unique values, could be structured!
                    # Store shape and keep going
                    shape.append(len(np.unique(val)))
                else:
                    # Data is not structured. Stop looping through inputs
                    data_pre_structured = False
                    break

    # check outputs, should be array matching shape of input vector lengths
    # if we already know data needs formatting, don't bother checking outputs
    if data_pre_structured:
        for key in interpolator_outputs:
            (val, units) = interpolator_data.get_item(key)
            if np.shape(val) != tuple(shape):
                if len(np.shape(val)) > 1:
                    # we assume user was *trying* to set up a structured grid
                    # if output is multi-dimensional array. If output is 1d it could
                    # be a strucured grid with one input, or a semistructured grid
                    raise ValueError(f'shape of output <{key}>, {np.shape(val)}, does '
                                     f'not match expected shape {tuple(shape)}')
                else:
                    # We don't know if data is structured or not if 1d. No harm
                    # in sorting and "reformatting", so assume it needs to be converted
                    data_pre_structured = False
                    break

    if structured is None and data_pre_structured:
        # If the data is already structured, just use a structured grid - it's faster
        # with no downsides
        structured = True
    elif structured is None:
        # In case structured is still None, set it to False - we know data is unstructured
        structured = False

    if not connect_training_data:
        # Sort and format data. Only if not using training data - since we have control
        # over both input and output data they are guarenteed to match after reformatting

        # sort data in semistructured grid format
        # always sort unless data is in structured format
        if not data_pre_structured:
            # first check that data are all vectors of the same length
            for idx, item in enumerate(get_items(interpolator_data)):
                key = item[0]
                units = item[1][1]
                if idx != 0:
                    prev_model_length = model_length
                else:
                    prev_model_length = len(interpolator_data.get_val(key, units))
                model_length = len(interpolator_data.get_val(key, units))
                if model_length != prev_model_length:
                    raise IndexError('Lengths of data provided for interpolation do not '
                                     'match.')

            # get data into column array format
            sorted_values = np.array([val for (key, (val, units))
                                      in get_items(interpolator_data)]).transpose()

            # get all the independent values in format needed for sorting
            independent_vals = np.array([val for (key, (val, units))
                                         in get_items(indep_vars)])

            # Sort by dependent variables in priority order of their appearance
            sorted_values = sorted_values[np.lexsort(np.flip(independent_vals, 0))]

            # reset interpolator_data with sorted values
            for idx, (var, (val, units)) in enumerate(get_items(interpolator_data)):
                interpolator_data.set_val(var, sorted_values[:, idx], units)

        # If user wants structured data, but provided data is not formatted correctly,
        # convert it!
        if structured and not data_pre_structured:
            # Use assumptions for structured grid to format data
            # Only need to reformat data when not using training data, user is responsible
            # for formatting in that case
            # Assumes independent variables are first columns

            (length, var_count) = np.shape(sorted_values)
            indep_var_count = np.shape(independent_vals)[0]

            structured_data = []
            # only need unique independent variables
            unique_data = []
            for i in range(indep_var_count):
                unique_data.append(np.unique(sorted_values[:, i]))
                structured_data.append(unique_data[i])

            shape = tuple([np.size(unique_data[i]) for i in range(indep_var_count)])

            # output data needs to be in nd array format
            for i in range(indep_var_count, var_count):
                structured_data.append(np.reshape(sorted_values[:, i], shape))

            # reset interpolator_data with structured grid formatted values
            for idx, (var, (val, units)) in enumerate(get_items(interpolator_data)):
                interpolator_data.set_val(var, structured_data[idx], units)

    if connect_training_data and structured and not data_pre_structured:
        # User has asked for structured data but not provided it. Use of training data
        # means we can't do any processing on the data including ensuring sorted order,
        # since that might misalign inputs with future connections we can't control here
        # Just convert inputs to structure grid format
        for key in get_keys(indep_vars):
            (val, units) = interpolator_data.get_item(key)
            # take unique values only, put back into interpolator_data
            val = np.unique(val)
            interpolator_data.set_val(key, val, units)

    # create interpolation component
    if structured:
        interp_comp = om.MetaModelStructuredComp(method=method,
                                                 extrapolate=extrapolate,
                                                 vec_size=num_nodes,
                                                 training_data_gradients=connect_training_data)
    else:
        interp_comp = om.MetaModelSemiStructuredComp(method=method,
                                                     extrapolate=extrapolate,
                                                     vec_size=num_nodes,
                                                     training_data_gradients=connect_training_data)

    # add interpolator inputs
    for key in get_keys(indep_vars):
        values, units = interpolator_data.get_item(key)
        interp_comp.add_input(key,
                              training_data=values,
                              units=units)
    # add interpolator outputs
    for key in interpolator_outputs:
        if key in interpolator_data:
            values, units = interpolator_data.get_item(key)
        if connect_training_data:
            units = interpolator_outputs[key]
            interp_comp.add_output(key,
                                   units=units)
        else:
            interp_comp.add_output(key,
                                   training_data=values,
                                   units=units)

    return interp_comp