Source code for n3fit.backends.keras_backend.MetaModel

"""
    MetaModel class

    Extension of the backend Model class containing some wrappers in order to absorb other
    backend-dependent calls.
"""

from pathlib import Path
import re

from keras import Variable
from keras import optimizers as Kopt
from keras.models import Model
import numpy as np

from . import operations as ops

# Define in this dictionary new optimizers as well as the arguments they accept
# (with default values if needed be)
optimizers = {
    "RMSprop": (Kopt.RMSprop, {"learning_rate": 0.01}),
    "Adam": (Kopt.Adam, {"learning_rate": 0.01}),
    "Adagrad": (Kopt.Adagrad, {}),
    "Adadelta": (Kopt.Adadelta, {"learning_rate": 1.0}),
    "Adamax": (Kopt.Adamax, {}),
    "Nadam": (Kopt.Nadam, {"learning_rate": 0.001}),
    "Amsgrad": (Kopt.Adam, {"learning_rate": 0.01, "amsgrad": True}),
    "SGD": (Kopt.SGD, {"learning_rate": 0.01, "momentum": 0.0, "nesterov": False}),
}

NN_PREFIX = "NN"
NN_LAYER_ALL_REPLICAS = "all_NNs"
PREPROCESSING_LAYER_ALL_REPLICAS = "preprocessing_factor"

# Some keys need to work for everyone
for k, v in optimizers.items():
    v[1]["clipnorm"] = 1.0


def _default_loss(y_true, y_pred):  # pylint: disable=unused-argument
    """Default loss to be used when the model is compiled with loss = Null
    (for instance if the prediction of the model is already the loss"""
    return ops.sum(y_pred)



[docs]
class MetaModel(Model):
    """
    The model wraps keras.Model and adds some custom behaviour. Most notably it
    allows supplying constant values for input arguments, which are used when
    training and making predictions with the model (note that constants need to
    be explicitly registered as inputs, see
    https://github.com/keras-team/keras/issues/11912). These inputs can be
    passed in the ``input_values`` parameter, or gathered from the
    ``tensor_content`` attribute of the ``input_tensors``, which is set
    automatically when using the ``numpy_to_input`` function from
    :py:mod:`n3fit.backends.keras_backend.operations`.

    Parameters
    ----------
        input_tensors: dict[Any, tensorflow.keras.layers.Input]
            Input layer
        output_tensors: tensorflow.keras.layers.Layer
            Output layer
        input_values: dict[Any, array_like]
            Constant values for the input layer, to be supplied when making
            predictions with the model.

        **kwargs:
            keyword arguments to pass directly to Model
    """

    accepted_optimizers = optimizers

    def __init__(self, input_tensors, output_tensors, scaler=None, input_values=None, **kwargs):
        self.has_dataset = False
        self.required_slots = set()

        if input_values is None:
            input_values = {}

        if not isinstance(input_tensors, dict):
            raise TypeError("Expecting input_tensors to be a dict")

        if not isinstance(input_values, dict):
            raise TypeError("Expecting input_values to be a dict or None")

        x_in = {}
        # Go over the inputs. If we can deduce a constant value, either because
        # it is set in input_values or because it has a tensor_content, we
        # store it. Otherwise we mark the input as required when making
        # predictions.
        for k, v in input_tensors.items():
            if k in input_values:
                x_in[k] = input_values[k]
            elif hasattr(v, "tensor_content"):
                x_in[k] = ops.numpy_to_tensor(v.tensor_content)
            else:
                self.required_slots.add(k)
        super().__init__(input_tensors, output_tensors, **kwargs)

        self.x_in = x_in
        self.input_tensors = input_tensors
        self.single_replica_generator = None

        self.target_tensors = None
        self.compute_losses_function = None
        self._scaler = scaler

    def _parse_input(self, extra_input=None):
        """Returns the input data the model was compiled with.
        Introduces the extra_input in the places asigned to the placeholders.

        If the model was generated with a scaler, the input will be scaled accordingly
        """
        if extra_input is None:
            if self.required_slots:
                raise ValueError(f"The following inputs must be provided: {self.required_slots}")
            return self.x_in

        if not isinstance(extra_input, dict):
            raise TypeError("extra_input should be a dict or None")

        if diff := (self.required_slots - extra_input.keys()):
            raise ValueError(f"The following inputs must be provided {diff}")

        if diff := (extra_input.keys() - (self.x_in.keys() | self.required_slots)):
            raise ValueError(f"The following inputs are unknown {diff}")

        if self._scaler is not None:
            extra_input = {k: self._scaler(i) for k, i in extra_input.items()}

        return {**self.x_in, **extra_input}


[docs]
    def perform_fit(self, x=None, y=None, epochs=1, **kwargs):
        """
        Performs forward (and backwards) propagation for the model for a given number of epochs.

        The output of this function consists on a dictionary that maps the names of the metrics
        of the model (the loss functions) to the partial losses.

        If the model was compiled with input and output data, they will not be passed through.
        In this case by default the number of ``epochs`` will be set to 1

        ex:
            {'loss': [100], 'dataset_a_loss1' : [67], 'dataset_2_loss': [33]}

        Returns
        -------
            loss_dict: dict
                a dictionary with all partial losses of the model
        """
        x_params = self._parse_input(x)
        if y is None:
            y = self.target_tensors

        # Avoids Tensorflow overhead that happens at every epoch, by putting multiple steps in an epoch
        steps_per_epoch = self._determine_steps_per_epoch(epochs)

        for k, v in x_params.items():
            x_params[k] = ops.repeat(v, steps_per_epoch, axis=0)
        y = [ops.repeat(yi, steps_per_epoch, axis=0) for yi in y]

        history = super().fit(
            x=x_params, y=y, epochs=epochs // steps_per_epoch, batch_size=1, **kwargs
        )
        loss_dict = history.history
        return loss_dict


    def _determine_steps_per_epoch(self, epochs):
        """Determine how many step to run in every epoch.
        When running a single replica (CPU) or when the number of epochs is < 100 default to 1.
        Otherwise run 100 steps per epoch.

        If the number of epochs requested is not divisible by 100 there will be a number
        of extra training epochs being run equal to max_epochs % 100 in the worst case.

        """
        num_replicas = self.output_shape[0]
        if num_replicas == 1 or epochs < 100:
            return 1

        return 100


[docs]
    def predict(self, x=None, **kwargs):
        """Call super().predict with the right input arguments"""
        x = self._parse_input(x)
        result = super().predict(x=x, **kwargs)
        return result



[docs]
    def compute_losses(self):
        """
        This function is equivalent to the model ``evaluate(x,y)`` method of most TensorFlow models
        which return a dictionary of losses per output layer.
        The losses reported in the ``evaluate`` method for n3fit are, however, summed over replicas.
        Instead the loss we are interested in is usually the output of the model (i.e., predict)
        This function then generates a dict of partial losses of the model separated per replica.
        i.e., the output for experiment {'LHC_exp'} will be an array of Nrep elements.

        Returns
        -------
            dict
                a dictionary with all partial losses of the model
        """
        if self.compute_losses_function is None:
            # If it is the first time we are passing through, compile the function and save it
            out_names = [f"{i}_loss" for i in self.output_names]
            out_names.insert(0, "loss")

            inputs = self._parse_input(None)
            # get rid of the repetitions by number of epochs made in perform_fit
            for k, v in inputs.items():
                inputs[k] = v[:1]

            # Compile a evaluation function
            @ops.decorator_compiler
            def losses_fun():
                predictions = self(inputs)
                # If we only have one dataset the output changes
                if len(out_names) == 2:
                    predictions = [predictions]
                total_loss = ops.sum(predictions, axis=0)
                ret = [total_loss] + predictions
                return dict(zip(out_names, ret))

            self.compute_losses_function = losses_fun

        ret = self.compute_losses_function()

        # The output of this function is to be used by python (and numpy)
        # so we need to convert the tensors
        return ops.dict_to_numpy_or_python(ret)



[docs]
    def compile(
        self,
        optimizer_name="RMSprop",
        learning_rate=None,
        loss=None,
        target_output=None,
        clipnorm=None,
        **kwargs,
    ):
        """
        Compile the model given an optimizer and a list of loss functions.
        The optimizer must be one of those implemented in the ``optimizer`` attribute of this class.

        Options:
            - A learning rate and a list of target outpout can be defined.
                These will be passed down to the optimizer.
            - A ``target_output`` can be defined. If done in this way
                (for instance because we know the target data will be the same for the whole fit)
                the data will be compiled together with the model and won't be necessary to
                input it again when calling the ``perform_fit`` or ``compute_losses`` methods.

        Parameters
        ----------
            optimizer_name: str
               string defining the optimizer to be used
            learning_rate: float
               learning rate of of the optimizer
               (if accepted as an argument, if not it will be ignored)
            loss: list
               list of loss functions to be pass to the model
            target_output: list
                list of outputs to compare the results to during fitting/evaluation
                if given further calls to fit/evaluate must be done with y = None.
        """
        try:
            opt_tuple = optimizers[optimizer_name]
        except KeyError as e:
            raise NotImplementedError(
                f"[MetaModel.select_initializer] optimizer not implemented: {optimizer_name}"
            ) from e

        if loss is None:
            loss = _default_loss

        opt_function = opt_tuple[0]
        opt_args = opt_tuple[1]

        user_selected_args = {"learning_rate": learning_rate, "clipnorm": clipnorm}

        # Override defaults with user provided values
        for key, value in user_selected_args.items():
            if key in opt_args.keys() and value is not None:
                opt_args[key] = value

        # Instantiate the optimizer
        opt = opt_function(**opt_args)

        # If given target output is None, target_output is unnecesary, save just a zero per output
        if target_output is None:
            self.target_tensors = [ops.numpy_to_tensor(np.zeros((1, 1))) for _ in self.output_shape]
        else:
            if not isinstance(target_output, list):
                target_output = [target_output]
            self.target_tensors = target_output

        # For debug purposes it may be interesting to set in the compile call
        # jit_compile = False
        # run_eager = True
        super().compile(optimizer=opt, loss=loss)



[docs]
    def set_masks_to(self, names, val=0.0):
        """Set all mask value to the selected value
        Masks in MetaModel should be named {name}_mask

        Mask are layers with one single weight (shape=(1,)) that multiplies the input

        Parameters
        ----------
            names: list
                list of masks to look for
            val: float
                selected value of the mask
        """
        mask_val = [val]
        for name in names:
            mask_name = f"{name}_mask"
            mask_w = self.get_layer(mask_name).weights[0]
            mask_w.assign(mask_val)



[docs]
    def reset_layer_weights_to(self, layer_names, reference_vals):
        """Set weights for the given layer to the given reference values

        The ``reference_vals`` values list must be a list of the same size
        of ``layer_names`` and it must consist of numpy arrays that perfectly
        align to the reference layer weights.
        In the special case of 1-weight layers it admits a scalar as input.

        Parameters
        ----------
            layer_names: list
                list of names of the layers to update weights
            reference_vals: list(float) or list(arrays)
                list of scalar or arrays to assign to each layer
        """
        for layer_name, values in zip(layer_names, reference_vals):
            if np.isscalar(values):
                values = np.array([[values]])
            layer = self.get_layer(layer_name)
            all_w = layer.weights
            for w, v in zip(all_w, values):
                w.assign(v)



[docs]
    def apply_as_layer(self, x):
        """Apply the model as a layer"""
        all_input = {**self.input_tensors, **x}
        return all_input, super().__call__(all_input)



[docs]
    def get_layer_re(self, regex):
        """Get all layers matching the given regular expression"""
        check = lambda x: re.match(regex, x.name)
        return list(filter(check, self.layers))



[docs]
    def get_replica_weights(self, i_replica):
        """
        Get the weights of replica i_replica.

        This assumes that the only weights are in the
        layer types defined as the constants
            NN_LAYER_ALL_REPLICAS & PREPROCESSING_LAYER_ALL_REPLICAS

        Parameters
        ----------
            i_replica: int

        Returns
        -------
            dict
                dictionary with the weights of the replica
        """
        weights = {}
        for layer_type in [NN_LAYER_ALL_REPLICAS, PREPROCESSING_LAYER_ALL_REPLICAS]:
            layer = self.get_layer(layer_type)
            weights[layer_type] = get_layer_replica_weights(layer, i_replica)

        return weights



[docs]
    def set_replica_weights(self, weights, i_replica=0):
        """
        Set the weights of replica i_replica.

        This assumes that the only weights are in layers called
        ``NN_{i_replica}`` and ``preprocessing_factor_{i_replica}``

        Parameters
        ----------
            weights: dict
                dictionary with the weights of the replica
            i_replica: int
                the replica number to set, defaulting to 0
        """
        for layer_type in [NN_LAYER_ALL_REPLICAS, PREPROCESSING_LAYER_ALL_REPLICAS]:
            layer = self.get_layer(layer_type)
            set_layer_replica_weights(layer=layer, weights=weights[layer_type], i_replica=i_replica)



[docs]
    def split_replicas(self):
        """
        Split the single multi-replica model into a list of separate single replica models,
        maintaining the current state of the weights.

        Returns
        -------
            list
                list of single replica models
        """
        if self.single_replica_generator is None:
            raise ValueError("Trying to generate single replica models with no generator set.")
        replicas = []
        for i_replica in range(self.num_replicas):
            replica = self.single_replica_generator()
            replica.set_replica_weights(self.get_replica_weights(i_replica))
            replicas.append(replica)

        return replicas


    @property
    def num_replicas(self):
        return self.output.shape[1]


[docs]
    def load_identical_replicas(self, model_file):
        """
        From a single replica model, load the same weights into all replicas.
        """
        model_file = Path(model_file)
        single_replica = self.single_replica_generator()
        single_replica.load_weights(model_file)
        weights = single_replica.get_replica_weights(0)

        for i_replica in range(self.num_replicas):
            self.set_replica_weights(weights, i_replica)



[docs]
    def save_weights(self, file):
        """
        Compatibility function for:
            - tf < 2.16, keras < 3: argument save format needed for h5
            - tf >= 2.16, keras >= 3: save format is deduced from the file extension
        In both cases, the final weights are finally copied to the ``file`` path.
        """
        try:
            # Keras 2, tf < 2.16
            super().save_weights(file, save_format="h5")
        except TypeError:
            # Newer versions of keras (>=3) drop the ``save_format`` argument
            # and instead take the format from the extension of the file
            # Also, from Keras 3.2 weights files must be suffixed as .weights.h5
            # for both saving and loading!
            if file.name.endswith(".weights.h5"):
                new_file = file
            else:
                new_file = file.with_suffix(f".weights.h5")
            super().save_weights(new_file)





[docs]
def is_stacked_single_replicas(layer):
    """
    Check if the layer consists of stacked single replicas (Only happens for NN layers),
    to determine how to extract single replica weights.

    Parameters
    ----------
        layer: MetaLayer
            the layer to check

    Returns
    -------
        bool
            True if the layer consists of stacked single replicas
    """
    if not isinstance(layer, MetaModel):
        return False
    return f"{NN_PREFIX}_0" in [sublayer.name for sublayer in layer.layers]




[docs]
def get_layer_replica_weights(layer, i_replica: int):
    """
    Get the weights for the given single replica ``i_replica``,
    from a ``layer`` that contains the weights of all the replicas.

    Note that the layer could be a complete NN with many separated sub_layers
    each of which containing weights for all replicas together.
    This functions separates the per-replica weights and returns the list of weight as if the
    input ``layer`` were made of _only_ replica ``i_replica``.

    Parameters
    ----------
        layer: MetaLayer
            the layer to get the weights from
        i_replica: int
            the replica number

    Returns
    -------
        weights: list
            list of weights for the replica
    """
    if is_stacked_single_replicas(layer):
        weights_ref = layer.get_layer(f"{NN_PREFIX}_{i_replica}").weights
        weights = [Variable(w, name=w.name) for w in weights_ref]
    else:
        weights = [Variable(w[i_replica : i_replica + 1], name=w.name) for w in layer.weights]

    return weights




[docs]
def set_layer_replica_weights(layer, weights, i_replica: int):
    """
    Set the weights for the given single replica ``i_replica``.
    When the input ``layer`` contains weights for many replicas, ensures that
    only those corresponding to replica ``i_replica`` are updated.

    Parameters
    ----------
        layer: MetaLayer
            the layer to set the weights for
        weights: list
            list of weights for the replica
        i_replica: int
            the replica number
    """
    if is_stacked_single_replicas(layer):
        layer.get_layer(f"{NN_PREFIX}_{i_replica}").set_weights(weights)
        return

    full_weights = [w.numpy() for w in layer.weights]
    for w_old, w_new in zip(full_weights, weights):
        w_old[i_replica : i_replica + 1] = w_new

    layer.set_weights(full_weights)