Source code for validphys.deltachi2

"""
deltachi2.py

Plots and data processing that can be used in a delta chi2 analysis
"""

import logging
import warnings

import numpy as np
import scipy as sp

from reportengine.checks import CheckError, make_argcheck
from reportengine.figure import figure, figuregen
from validphys import plotutils
from validphys.checks import check_pdf_normalize_to, check_pdfs_noband, check_scale
from validphys.core import PDF
from validphys.pdfplots import BandPDFPlotter, PDFPlotter

log = logging.getLogger(__name__)


[docs]@make_argcheck def check_pdf_is_symmhessian(pdf, **kwargs): """Check ``pdf`` has error type of ``symmhessian``""" etype = pdf.error_type if etype != "symmhessian": raise CheckError(f"Error: type of PDF {pdf} must be 'symmhessian' and not {etype}")
[docs]@check_pdf_is_symmhessian def delta_chi2_hessian(pdf, total_chi2_data): """ Return delta_chi2 (computed as in plot_delta_chi2_hessian) relative to each eigenvector of the Hessian set. """ delta_chi2 = ( np.ravel(total_chi2_data.replica_result.error_members()) - total_chi2_data.central_result ) return delta_chi2
[docs]@figure def plot_kullback_leibler(delta_chi2_hessian): """ Determines the Kullback–Leibler divergence by comparing the expectation value of Delta chi2 to the cumulative distribution function of chi-square distribution with one degree of freedom (see: https://en.wikipedia.org/wiki/Chi-square_distribution). The Kullback-Leibler divergence provides a measure of the difference between two distribution functions, here we compare the chi-squared distribution and the cumulative distribution of the expectation value of Delta chi2. """ delta_chi2 = delta_chi2_hessian bins_nnpdf = np.arange(0, 7.5, 0.1) # find the midpoint of each bin, has length len(bins_nnpdf) - 1 bin_central_nnpdf = (bins_nnpdf[1:] + bins_nnpdf[:-1]) / 2 x = np.linspace(0, 7.4, 200) fig, ax = plotutils.subplots() vals_nnpdf, _, _ = ax.hist( delta_chi2, bins=bins_nnpdf, density=True, cumulative=True, label=r"cumulative $\Delta\chi^2$", ) # compute Kullback-Leibler (null values set to 1e-8) vals_nnpdf[vals_nnpdf == 0] = 1e-8 kl_nnpdf = sp.stats.entropy(sp.stats.chi2.cdf(bin_central_nnpdf, 1), qk=vals_nnpdf) ax.plot(x, sp.stats.chi2.cdf(x, 1), label=r"$\chi^2$ CDF") ax.set_title(f"KL divergence: {kl_nnpdf:.4f}") ax.set_xlabel(r"$<\Delta\chi^2>$") ax.legend() return fig
[docs]@figure def plot_delta_chi2_hessian_eigenv(delta_chi2_hessian, pdf): """ Plot of the chi2 difference between chi2 of each eigenvector of a symmHessian set and the central value for all experiments in a fit. As a function of every eigenvector in a first plot, and as a distribution in a second plot. """ delta_chi2 = delta_chi2_hessian x = np.arange(1, len(delta_chi2) + 1) fig, ax = plotutils.subplots() ax.bar(x, delta_chi2, label=pdf.label) ax.set_xlabel("# Hessian PDF") ax.set_ylabel(r"$\Delta\chi^2$") ax.set_title(r"$\Delta\chi^2$ each eigenvector") ax.legend() return fig
[docs]@figure def plot_delta_chi2_hessian_distribution(delta_chi2_hessian, pdf, total_chi2_data): """ Plot of the chi2 difference between chi2 of each eigenvector of a symmHessian set and the central value for all experiments in a fit. As a function of every eigenvector in a first plot, and as a distribution in a second plot. """ delta_chi2 = delta_chi2_hessian fig, ax = plotutils.subplots() bins = np.arange(np.floor(min(delta_chi2)), np.ceil(max(delta_chi2)) + 1) ax.hist( delta_chi2, bins=bins, label=f"{pdf.label} - $\chi^2_{0}$={total_chi2_data.central_result:.0f}", ) ax.set_xlabel(r"$\Delta\chi^2$") ax.set_title(r"$\Delta\chi^2$ distribution") return fig
[docs]def pos_neg_xplotting_grids(delta_chi2_hessian, xplotting_grid): """ Generates xplotting_grids correspodning to positive and negative delta chi2s. """ positive_eigenvalue_mask = delta_chi2_hessian >= 0 # The masks do not include replica 0, add it in both grids pos_mask = np.append(True, positive_eigenvalue_mask) neg_mask = np.append(True, ~positive_eigenvalue_mask) pos_grid = xplotting_grid.grid_values.data[pos_mask] neg_grid = xplotting_grid.grid_values.data[neg_mask] # Wrap everything back into the original stats class stats_class = xplotting_grid.grid_values.__class__ pos_xplotting_grid = xplotting_grid.copy_grid(stats_class(pos_grid)) neg_xplotting_grid = xplotting_grid.copy_grid(stats_class(neg_grid)) return [xplotting_grid, pos_xplotting_grid, neg_xplotting_grid]
[docs]@figuregen @check_pdf_normalize_to @check_pdfs_noband @check_scale("xscale", allow_none=True) def plot_pos_neg_pdfs( pdf, pos_neg_xplotting_grids, xscale: (str, type(None)) = None, normalize_to: (int, str, type(None)) = None, ymin=None, ymax=None, pdfs_noband: (list, type(None)) = None, ): """ Plot the the uncertainty of the original hessian pdfs, as well as that of the positive and negative subset. """ original_pdf = pdf.name # create fake PDF objects so we can reuse BandPDFPlotter pos_pdf = PDF(original_pdf) pos_pdf.label = f"{original_pdf}_pos" neg_pdf = PDF(original_pdf) neg_pdf.label = f"{original_pdf}_neg" pdfs = [pdf, pos_pdf, neg_pdf] yield from BandPDFPlotter( pdfs, pos_neg_xplotting_grids, xscale, normalize_to, ymin, ymax, pdfs_noband=pdfs_noband )
[docs]class PDFEpsilonPlotter(PDFPlotter): """Subclassing PDFPlotter in order to plot epsilon (measure of gaussanity) for multiple PDFs, yielding a separate figure for each flavour """
[docs] def setup_flavour(self, flstate): flstate.labels = [] flstate.handles = []
[docs] def get_ylabel(self, parton_name): return r'$\epsilon(x)$'
[docs] def draw(self, pdf, grid, flstate): """Obtains the gridvalues of epsilon (measure of Gaussianity)""" ax = flstate.ax flindex = flstate.flindex labels = flstate.labels handles = flstate.handles # Create a copy of the `Stats` instance of the grid # with only the flavours we are interested in gv = grid.grid_values.data stats = grid(grid_values=gv[:, flindex, :]).grid_values # Ignore spurious normalization warnings with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) error68down, error68up = stats.errorbar68() # 68% error bands errorstd = stats.std_error() # color cycle iterable color = ax._get_lines.get_next_color() xgrid = grid.xgrid # the division by 2 is equivalent to considering the complete 1-sigma band (2 * error_std) error68 = (error68up - error68down) / 2.0 epsilon = abs(1 - errorstd / error68) (handle,) = ax.plot(xgrid, epsilon, linestyle="-", color=color) handles.append(handle) labels.append(pdf.label) return [5 * epsilon]
[docs] def legend(self, flstate): return flstate.ax.legend( flstate.handles, flstate.labels, handler_map={plotutils.HandlerSpec: plotutils.ComposedHandler()}, )
[docs]@make_argcheck def check_pdfs_are_montecarlo(pdfs, **kwargs): """Checks that the action is applied only to a pdf consisiting of MC replicas.""" for pdf in pdfs: etype = pdf.error_type if etype != "replicas": raise CheckError(f"Error: type of PDF {pdf} must be 'replicas' and not '{etype}'")
[docs]@figuregen @check_pdfs_are_montecarlo @check_scale("xscale", allow_none=True) def plot_epsilon( pdfs, xplotting_grids, xscale: (str, type(None)) = None, ymin=None, ymax=None, eps=None ): """Plot the discrepancy (epsilon) of the 1-sigma and 68% bands at each grid value for all pdfs for a given Q. See https://arxiv.org/abs/1505.06736 eq. (11) xscale is read from pdf plotting_grid scale, which is 'log' by default. eps defines the value at which plot a simple hline """ yield from PDFEpsilonPlotter( pdfs, xplotting_grids, xscale, normalize_to=None, ymin=ymin, ymax=ymax )