Source code for validphys.calcutils

"""
calcutils.py

Low level utilities to calculate χ² and such. These are used to implement the
higher level functions in results.py
"""

import logging
from typing import Callable

import numpy as np
import pandas as pd
import scipy.linalg as la

log = logging.getLogger(__name__)


[docs]def calc_chi2(sqrtcov, diffs):
    r"""Elementary function to compute the chi², given a Cholesky decomposed
    lower triangular part and a vector of differences.

    Parameters
    ----------
    sqrtcov : matrix
        A lower tringular matrix corresponding to the lower part of
        the Cholesky decomposition of the covariance matrix.
    diffs : array
        A vector of differences (e.g. between data and theory).
        The first dimenssion must match the shape of `sqrtcov`.
        The computation will be broadcast over the other dimensions.

    Returns
    -------
    chi2 : array
        The result of the χ² for each vector of differences.
        Will have the same shape as ``diffs.shape[1:]``.

    Notes
    -----
    This function computes the χ² more efficiently and accurately than
    following the direct definition of inverting the covariance matrix,
    :math:`\chi^2 = d\Sigma^{-1}d`,
    by solving the triangular linear system instead.

    Examples
    --------

    >>> from validphys.calcutils import calc_chi2
    >>> import numpy as np
    >>> import scipy.linalg as la
    >>> np.random.seed(0)
    >>> diffs = np.random.rand(10)
    >>> s = np.random.rand(10,10)
    >>> cov = s@s.T
    >>> calc_chi2(la.cholesky(cov, lower=True), diffs)
    44.64401691354948
    >>> diffs@la.inv(cov)@diffs
    44.64401691354948

    """
    # handle empty data
    if not diffs.size:
        return np.full(diffs.shape[1:], np.nan)
    # Note la.cho_solve doesn't really improve things here
    # NOTE: Do not enable check_finite. The upper triangular part is not
    # guaranteed to make any sense.
    vec = la.solve_triangular(sqrtcov, diffs, lower=True, check_finite=False)
    # This sums up the result for the chi² for any input shape.
    # Sum the squares over the first dimension and leave the others alone
    return np.einsum('i...,i...->...', vec, vec)


[docs]def all_chi2(results):
    """Return the chi² for all elements in the result, regardless of the Stats class
    Note that the interpretation of the result will depend on the PDF error type"""
    data_result, th_result = results
    diffs = th_result.rawdata - data_result.central_value[:, np.newaxis]
    return calc_chi2(sqrtcov=data_result.sqrtcovmat, diffs=diffs)


[docs]def central_chi2(results):
    """Calculate the chi² from the central value of the theory prediction to
    the data"""
    data_result, th_result = results
    central_diff = th_result.central_value - data_result.central_value
    return calc_chi2(data_result.sqrtcovmat, central_diff)


[docs]def all_chi2_theory(results, totcov):
    """Like all_chi2 but here the chi² are calculated using a covariance matrix
    that is the sum of the experimental covmat and the theory covmat."""
    data_result, th_result = results
    diffs = th_result.rawdata - data_result.central_value[:, np.newaxis]
    total_covmat = np.array(totcov)
    return calc_chi2(sqrtcov=la.cholesky(total_covmat, lower=True), diffs=diffs)


[docs]def central_chi2_theory(results, totcov):
    """Like central_chi2 but here the chi² is calculated using a covariance matrix
    that is the sum of the experimental covmat and the theory covmat."""
    data_result, th_result = results
    central_diff = th_result.central_value - data_result.central_value
    total_covmat = np.array(totcov)
    return calc_chi2(la.cholesky(total_covmat, lower=True), central_diff)


[docs]def calc_phi(sqrtcov, diffs):
    """Low level function which calculates phi given a Cholesky decomposed
    lower triangular part and a vector of differences. Primarily used when phi
    is to be calculated independently from chi2.

    The vector of differences `diffs` is expected to have N_bins on the first
    axis
    """
    diffs = np.array(diffs)
    return np.sqrt(
        (np.mean(calc_chi2(sqrtcov, diffs), axis=0) - calc_chi2(sqrtcov, diffs.mean(axis=1)))
        / diffs.shape[0]
    )


[docs]def bootstrap_values(
    data, nresamples, *, boot_seed: int = None, apply_func: Callable = None, args=None
):
    """General bootstrap sample

    `data` is the data which is to be sampled, replicas is assumed to
    be on the final axis e.g N_bins*N_replicas

    `boot_seed` can be specified if the user wishes to be able to
    take exact same bootstrap samples multiple times, as default it is
    set as None, in which case a random seed is used.

    If just `data` and `nresamples` is provided, then `bootstrap_values`
    creates N resamples of the data, where each resample is a Monte Carlo
    selection of the data across replicas. The mean of each resample is
    returned

    Alternatively, the user can specify a function to be sampled `apply_func`
    plus any additional arguments required by that function.
    `bootstrap_values` then returns `apply_func(bootstrap_data, *args)`
    where `bootstrap_data.shape = (data.shape, nresamples)`. It is
    critical that `apply_func` can handle data input in this format.
    """
    data = np.atleast_2d(data)
    N_reps = data.shape[-1]
    bootstrap_data = data[
        ..., np.random.RandomState(boot_seed).randint(N_reps, size=(N_reps, nresamples))
    ]
    if apply_func is None:
        return np.mean(bootstrap_data, axis=-2)
    else:
        return apply_func(bootstrap_data, *args)


[docs]def get_df_block(matrix: pd.DataFrame, key: str, level):
    """Given a pandas dataframe whose index and column keys match, and data represents a symmetric
    matrix returns a diagonal block of this matrix corresponding to `matrix`[key`, key`] as a numpy
    array

    addtitionally, the user can specify the `level` of the key for which the cross section is being
    taken, by default it is set to 1 which corresponds to the dataset level of a theory covariance
    matrix
    """
    block = matrix.xs(key, level=level, axis=0).xs(key, level=level, axis=1).values
    return block


[docs]def regularize_covmat(covmat: np.array, norm_threshold=4):
    """Given a covariance matrix, performs a regularization which is equivalent
    to performing `regularize_l2` on the sqrt of `covmat`: the l2 norm of
    the inverse of the correlation matrix calculated from `covmat` is set to be
    less than or equal to `norm_threshold`. If the input covmat already fulfills
    this criterion it is returned.

    Parameters
    ----------
    covmat : array
        a covariance matrix which is to be regularized.
    norm_threshold : float
        The acceptable l2 norm of the sqrt correlation matrix, by default
        set to 4.

    Returns
    -------
    new_covmat : array
        A new covariance matrix which has been regularized according to
        prescription above.

    """
    # square up threshold since we have cov not sqrtcov
    sqr_threshold = norm_threshold**2
    d = np.sqrt(np.diag(covmat))[:, np.newaxis]
    corr = covmat / d / d.T
    # eigh gives eigenvals in ascending order
    e_val, e_vec = la.eigh(corr)
    # if eigenvalues are close to zero, can be negative
    if e_val[0] < 0:
        log.warning(
            "Negative eigenvalue encountered in correlation matrix: %s. "
            "Assuming eigenvalue should be zero and is negative due to numerical "
            "precision.",
            e_val[0],
        )
    if e_val[0] > 1 / sqr_threshold:
        return covmat
    new_e_val = np.clip(e_val, a_min=1 / sqr_threshold, a_max=None)
    return ((e_vec * new_e_val) @ e_vec.T) * d * d.T


[docs]def regularize_l2(sqrtcov, norm_threshold=4):
    r"""Return a regularized version of `sqrtcov`.

    Given `sqrtcov` an (N, nsys) matrix, such that it's
    gram matrix is the covariance matrix (`covmat = sqrtcov@sqrtcov.T`), first
    decompose it like ``sqrtcov = D@A``, where `D` is a positive diagonal matrix
    of standard deviations and `A` is the "square root" of the correlation
    matrix, ``corrmat = A@A.T``. Then produce a new version of `A` which removes
    the unstable behaviour and assemble a new square root covariance matrix,
    which is returned.

    The stability condition is controlled by `norm_threshold`. It is

    .. math::

        \left\Vert A^+ \right\Vert_{L2}
        \leq \frac{1}{\text{norm_threshold}}

    A+ is the pseudoinverse of A, `norm_threshold` roughly corresponds to the
    sqrt of the maximimum relative uncertainty in any systematic.

    Parameters
    ----------

    sqrtcov : 2d array
        An (N, nsys) matrix specifying the uncertainties.
    norm_threshold : float
        The tolerance for the regularization.

    Returns
    -------

    newsqrtcov : 2d array
        A regularized version of `sqrtcov`.
    """

    d = np.sqrt(np.sum(sqrtcov**2, axis=1))[:, np.newaxis]
    sqrtcorr = sqrtcov / d
    u, s, vt = la.svd(sqrtcorr, full_matrices=False)
    if 1 / s[-1] <= norm_threshold:
        return sqrtcov
    snew = np.clip(s, a_min=1 / norm_threshold, a_max=None)
    return u * (snew * d) @ vt