Source code for validphys.calcutils

"""
calcutils.py

Low level utilities to calculate χ² and such. These are used to implement the
higher level functions in results.py
"""
import logging
from typing import Callable

import numpy as np
import pandas as pd
import scipy.linalg as la

log = logging.getLogger(__name__)


[docs]def calc_chi2(sqrtcov, diffs): """Elementary function to compute the chi², given a Cholesky decomposed lower triangular part and a vector of differences. Parameters ---------- sqrtcov : matrix A lower tringular matrix corresponding to the lower part of the Cholesky decomposition of the covariance matrix. diffs : array A vector of differences (e.g. between data and theory). The first dimenssion must match the shape of `sqrtcov`. The computation will be broadcast over the other dimensions. Returns ------- chi2 : array The result of the χ² for each vector of differences. Will have the same shape as ``diffs.shape[1:]``. Notes ----- This function computes the χ² more efficiently and accurately than following the direct definition of inverting the covariance matrix, :math:`\chi^2 = d\Sigma^{-1}d`, by solving the triangular linear system instead. Examples -------- >>> from validphys.calcutils import calc_chi2 >>> import numpy as np >>> import scipy.linalg as la >>> np.random.seed(0) >>> diffs = np.random.rand(10) >>> s = np.random.rand(10,10) >>> cov = s@s.T >>> calc_chi2(la.cholesky(cov, lower=True), diffs) 44.64401691354948 >>> diffs@la.inv(cov)@diffs 44.64401691354948 """ # handle empty data if not diffs.size: return np.full(diffs.shape[1:], np.nan) # Note la.cho_solve doesn't really improve things here # NOTE: Do not enable check_finite. The upper triangular part is not # guaranteed to make any sense. vec = la.solve_triangular(sqrtcov, diffs, lower=True, check_finite=False) # This sums up the result for the chi² for any input shape. # Sum the squares over the first dimension and leave the others alone return np.einsum('i...,i...->...', vec, vec)
[docs]def all_chi2(results): """Return the chi² for all elements in the result, regardless of the Stats class Note that the interpretation of the result will depend on the PDF error type""" data_result, th_result = results diffs = th_result.rawdata - data_result.central_value[:, np.newaxis] return calc_chi2(sqrtcov=data_result.sqrtcovmat, diffs=diffs)
[docs]def central_chi2(results): """Calculate the chi² from the central value of the theory prediction to the data""" data_result, th_result = results central_diff = th_result.central_value - data_result.central_value return calc_chi2(data_result.sqrtcovmat, central_diff)
[docs]def all_chi2_theory(results, totcov): """Like all_chi2 but here the chi² are calculated using a covariance matrix that is the sum of the experimental covmat and the theory covmat.""" data_result, th_result = results diffs = th_result.rawdata - data_result.central_value[:, np.newaxis] total_covmat = np.array(totcov) return calc_chi2(sqrtcov=la.cholesky(total_covmat, lower=True), diffs=diffs)
[docs]def central_chi2_theory(results, totcov): """Like central_chi2 but here the chi² is calculated using a covariance matrix that is the sum of the experimental covmat and the theory covmat.""" data_result, th_result = results central_diff = th_result.central_value - data_result.central_value total_covmat = np.array(totcov) return calc_chi2(la.cholesky(total_covmat, lower=True), central_diff)
[docs]def calc_phi(sqrtcov, diffs): """Low level function which calculates phi given a Cholesky decomposed lower triangular part and a vector of differences. Primarily used when phi is to be calculated independently from chi2. The vector of differences `diffs` is expected to have N_bins on the first axis """ diffs = np.array(diffs) return np.sqrt( (np.mean(calc_chi2(sqrtcov, diffs), axis=0) - calc_chi2(sqrtcov, diffs.mean(axis=1))) / diffs.shape[0] )
[docs]def bootstrap_values( data, nresamples, *, boot_seed: int = None, apply_func: Callable = None, args=None ): """General bootstrap sample `data` is the data which is to be sampled, replicas is assumed to be on the final axis e.g N_bins*N_replicas `boot_seed` can be specified if the user wishes to be able to take exact same bootstrap samples multiple times, as default it is set as None, in which case a random seed is used. If just `data` and `nresamples` is provided, then `bootstrap_values` creates N resamples of the data, where each resample is a Monte Carlo selection of the data across replicas. The mean of each resample is returned Alternatively, the user can specify a function to be sampled `apply_func` plus any additional arguments required by that function. `bootstrap_values` then returns `apply_func(bootstrap_data, *args)` where `bootstrap_data.shape = (data.shape, nresamples)`. It is critical that `apply_func` can handle data input in this format. """ data = np.atleast_2d(data) N_reps = data.shape[-1] bootstrap_data = data[ ..., np.random.RandomState(boot_seed).randint(N_reps, size=(N_reps, nresamples)) ] if apply_func is None: return np.mean(bootstrap_data, axis=-2) else: return apply_func(bootstrap_data, *args)
[docs]def get_df_block(matrix: pd.DataFrame, key: str, level): """Given a pandas dataframe whose index and column keys match, and data represents a symmetric matrix returns a diagonal block of this matrix corresponding to `matrix`[key`, key`] as a numpy array addtitionally, the user can specify the `level` of the key for which the cross section is being taken, by default it is set to 1 which corresponds to the dataset level of a theory covariance matrix """ block = matrix.xs(key, level=level, axis=0).xs(key, level=level, axis=1).values return block
[docs]def regularize_covmat(covmat: np.array, norm_threshold=4): """Given a covariance matrix, performs a regularization which is equivalent to performing `regularize_l2` on the sqrt of `covmat`: the l2 norm of the inverse of the correlation matrix calculated from `covmat` is set to be less than or equal to `norm_threshold`. If the input covmat already fulfills this criterion it is returned. Parameters ---------- covmat : array a covariance matrix which is to be regularized. norm_threshold : float The acceptable l2 norm of the sqrt correlation matrix, by default set to 4. Returns ------- new_covmat : array A new covariance matrix which has been regularized according to prescription above. """ # square up threshold since we have cov not sqrtcov sqr_threshold = norm_threshold**2 d = np.sqrt(np.diag(covmat))[:, np.newaxis] corr = covmat / d / d.T # eigh gives eigenvals in ascending order e_val, e_vec = la.eigh(corr) # if eigenvalues are close to zero, can be negative if e_val[0] < 0: log.warning( "Negative eigenvalue encountered in correlation matrix: %s. " "Assuming eigenvalue should be zero and is negative due to numerical " "precision.", e_val[0], ) if e_val[0] > 1 / sqr_threshold: return covmat new_e_val = np.clip(e_val, a_min=1 / sqr_threshold, a_max=None) return ((e_vec * new_e_val) @ e_vec.T) * d * d.T
[docs]def regularize_l2(sqrtcov, norm_threshold=4): r"""Return a regularized version of `sqrtcov`. Given `sqrtcov` an (N, nsys) matrix, such that it's gram matrix is the covariance matrix (`covmat = sqrtcov@sqrtcov.T`), first decompose it like ``sqrtcov = D@A``, where `D` is a positive diagonal matrix of standard deviations and `A` is the "square root" of the correlation matrix, ``corrmat = A@A.T``. Then produce a new version of `A` which removes the unstable behaviour and assemble a new square root covariance matrix, which is returned. The stability condition is controlled by `norm_threshold`. It is .. math:: \left\Vert A^+ \right\Vert_{L2} \leq \frac{1}{\text{norm_threshold}} A+ is the pseudoinverse of A, `norm_threshold` roughly corresponds to the sqrt of the maximimum relative uncertainty in any systematic. Parameters ---------- sqrtcov : 2d array An (N, nsys) matrix specifying the uncertainties. norm_threshold : float The tolerance for the regularization. Returns ------- newsqrtcov : 2d array A regularized version of `sqrtcov`. """ d = np.sqrt(np.sum(sqrtcov**2, axis=1))[:, np.newaxis] sqrtcorr = sqrtcov / d u, s, vt = la.svd(sqrtcorr, full_matrices=False) if 1 / s[-1] <= norm_threshold: return sqrtcov snew = np.clip(s, a_min=1 / norm_threshold, a_max=None) return u * (snew * d) @ vt