"""
Module containg the losses to be apply to the models as layers
The layer take the input from the model and acts on it producing a score function.
For instance, in the case of the chi2 (``LossInvcovmat``) the function takes only
the prediction of the model and, during instantiation, took the real data to compare with
and the covmat.
"""
import numpy as np
from n3fit.backends import MetaLayer
from n3fit.backends import operations as op
[docs]
class LossInvcovmat(MetaLayer):
"""
Loss function such that:
L = \\sum_{ij} (yt - yp)_{i} invcovmat_{ij} (yt - yp)_{j}
Takes as argument the inverse of the covmat and the target data.
It also takes an optional argument to mask part of the predictions
Both the inverse covmat and the mask (if any) are stored as layer weights
and can be updated at any points either directly or by using the
``update_mask`` and ``add_covmat`` methods.
Example
-------
>>> import numpy as np
>>> from n3fit.layers import losses
>>> C = np.random.rand(5,5)
>>> data = np.random.rand(1, 1, 5)
>>> pred = np.random.rand(1, 1, 5)
>>> invC = np.linalg.inv( C @ C.T)
>>> loss_f = losses.LossInvcovmat(invC, data)
>>> loss_f(pred).shape == 1
True
"""
def __init__(self, invcovmat, y_true, mask=None, covmat=None, **kwargs):
self._invcovmat = op.numpy_to_tensor(invcovmat)
self._covmat = covmat
self._y_true = op.numpy_to_tensor(y_true)
self._ndata = y_true.shape[-1]
if mask is None or all(mask):
self._mask = None
else:
mask = np.array(mask, dtype=np.float32).reshape((1, 1, -1))
self._mask = op.numpy_to_tensor(mask)
super().__init__(**kwargs)
[docs]
def build(self, input_shape):
"""Transform the inverse covmat and the mask into
weights of the layers"""
init = MetaLayer.init_constant(self._invcovmat)
self.kernel = self.builder_helper("invcovmat", self._invcovmat.shape, init, trainable=False)
mask_shape = (1, 1, self._ndata)
if self._mask is None:
init_mask = MetaLayer.init_constant(np.ones(mask_shape))
else:
init_mask = MetaLayer.init_constant(self._mask)
self.mask = self.builder_helper("mask", mask_shape, init_mask, trainable=False)
[docs]
def add_covmat(self, covmat):
"""Add a piece to the inverse covmat weights
Note, however, that the _covmat attribute of the layer will
still refer to the original data covmat
"""
new_covmat = np.linalg.inv(self._covmat + covmat)
self.kernel.assign(new_covmat)
[docs]
def update_mask(self, new_mask):
"""Update the mask"""
self.mask.assign(new_mask)
[docs]
def call(self, y_pred, **kwargs):
obs_diff_raw = self._y_true - y_pred
# TODO: most of the time this is a y * I multiplication and can be skipped
# benchmark how much time (if any) is lost in this in actual fits for the benefit of faster kfolds
obs_diff = op.op_multiply([obs_diff_raw, self.mask])
# The experimental loss doesn't depend on replicas, so it doesn't have a replica axis and
# must be treated separately
experimental_loss = len(self.kernel.shape) == 2
one_replica = obs_diff.shape[1] == 1
if one_replica: # einsum is not well suited for CPU, so use tensordot if single replica
kernel = self.kernel if experimental_loss else self.kernel[0]
right_dot = op.tensor_product(kernel, obs_diff[0, 0, :], axes=1)
loss = op.tensor_product(obs_diff[0, :, :], right_dot, axes=1)
else:
einstr = "bri, ij, brj -> r" if experimental_loss else "bri, rij, brj -> r"
loss = op.einsum(einstr, obs_diff, self.kernel, obs_diff)
return loss
[docs]
class LossLagrange(MetaLayer):
"""
Abstract loss function to apply lagrange multipliers to a model.
L = \\lambda * f(y)
The form of f(y) is given by modifying the ``apply_loss`` method.
It is possible to modify how the multiplication of the lambda factor is implemented
by modifying the ``apply_multiplier`` method.
The (non trainable) weight containing the multiplier is named ``lagMult``.
"""
def __init__(self, c=1.0, **kwargs):
self._initial_multiplier = c
super().__init__(**kwargs)
[docs]
def build(self, input_shape):
multiplier = MetaLayer.init_constant(self._initial_multiplier)
self.kernel = self.builder_helper("lagMult", (1,), multiplier, trainable=False)
super().build(input_shape)
[docs]
def apply_multiplier(self, y):
return self.kernel * y
[docs]
def apply_loss(self, y):
return y
[docs]
def call(self, y_pred, **kwargs):
y = self.apply_multiplier(y_pred)
return self.apply_loss(y)
[docs]
class LossPositivity(LossLagrange):
"""
Returns L = \\lambda*elu(y_pred)
The positivity loss is computed by inverting the sign of the
datapoints and then applying the elu function, this function is
f(x) = x if x > 0
f(x) = alpha * (e^{x} - 1) if x < 0
This is done to avoid a big discontinuity in the derivative at 0 when
the lagrange multiplier is very big.
In practice this function can produce results in the range (-alpha, inf)
Example
-------
>>> import numpy as np
>>> from n3fit.layers import losses
>>> pred = np.random.rand(1, 1, 5)
>>> alpha = 1e-7
>>> c = 1e8
>>> loss_f = losses.LossPositivity(c=c, alpha=alpha)
>>> loss_f(pred) == -5*alpha
True
>>> loss_f(-pred) > c
True
"""
def __init__(self, alpha=1e-7, **kwargs):
self.alpha = alpha
super().__init__(**kwargs)
[docs]
def apply_loss(self, y_pred):
loss = op.elu(-y_pred, alpha=self.alpha)
# Sum over the batch and the datapoints
return op.sum(loss, axis=[0, -1])
[docs]
class LossIntegrability(LossLagrange):
"""
Returns L = (y_pred)*(y_pred)
Example
-------
>>> import numpy as np
>>> from n3fit.layers import losses
>>> pred = np.random.rand(1, 1, 5)
>>> loss_f = losses.LossIntegrability(c=1e2)
>>> loss_f(pred) > 0
True
"""
[docs]
def apply_loss(self, y_pred):
y = y_pred * y_pred
# Sum over the batch and the datapoints
return op.sum(y, axis=[0, -1])