"""
deltachi2.py
Plots and data processing that can be used in a delta chi2 analysis
"""
import logging
import warnings
import numpy as np
import scipy as sp
from reportengine.checks import CheckError, make_argcheck
from reportengine.figure import figure, figuregen
from validphys import plotutils
from validphys.checks import check_pdf_normalize_to, check_pdfs_noband, check_scale
from validphys.core import PDF
from validphys.pdfplots import BandPDFPlotter, PDFPlotter
log = logging.getLogger(__name__)
[docs]@make_argcheck
def check_pdf_is_symmhessian(pdf, **kwargs):
"""Check ``pdf`` has error type of ``symmhessian``"""
etype = pdf.error_type
if etype != "symmhessian":
raise CheckError(f"Error: type of PDF {pdf} must be 'symmhessian' and not {etype}")
[docs]@check_pdf_is_symmhessian
def delta_chi2_hessian(pdf, total_chi2_data):
"""
Return delta_chi2 (computed as in plot_delta_chi2_hessian) relative to
each eigenvector of the Hessian set.
"""
delta_chi2 = (
np.ravel(total_chi2_data.replica_result.error_members()) - total_chi2_data.central_result
)
return delta_chi2
[docs]@figure
def plot_kullback_leibler(delta_chi2_hessian):
"""
Determines the Kullback–Leibler divergence by comparing the expectation value of Delta chi2 to
the cumulative distribution function of chi-square distribution with one degree of freedom
(see: https://en.wikipedia.org/wiki/Chi-square_distribution).
The Kullback-Leibler divergence provides a measure of the difference between two distribution
functions, here we compare the chi-squared distribution and the cumulative distribution of the
expectation value of Delta chi2.
"""
delta_chi2 = delta_chi2_hessian
bins_nnpdf = np.arange(0, 7.5, 0.1)
# find the midpoint of each bin, has length len(bins_nnpdf) - 1
bin_central_nnpdf = (bins_nnpdf[1:] + bins_nnpdf[:-1]) / 2
x = np.linspace(0, 7.4, 200)
fig, ax = plotutils.subplots()
vals_nnpdf, _, _ = ax.hist(
delta_chi2,
bins=bins_nnpdf,
density=True,
cumulative=True,
label=r"cumulative $\Delta\chi^2$",
)
# compute Kullback-Leibler (null values set to 1e-8)
vals_nnpdf[vals_nnpdf == 0] = 1e-8
kl_nnpdf = sp.stats.entropy(sp.stats.chi2.cdf(bin_central_nnpdf, 1), qk=vals_nnpdf)
ax.plot(x, sp.stats.chi2.cdf(x, 1), label=r"$\chi^2$ CDF")
ax.set_title(f"KL divergence: {kl_nnpdf:.4f}")
ax.set_xlabel(r"$<\Delta\chi^2>$")
ax.legend()
return fig
[docs]@figure
def plot_delta_chi2_hessian_eigenv(delta_chi2_hessian, pdf):
"""
Plot of the chi2 difference between chi2 of each eigenvector of a symmHessian set
and the central value for all experiments in a fit.
As a function of every eigenvector in a first plot, and as a distribution in a second plot.
"""
delta_chi2 = delta_chi2_hessian
x = np.arange(1, len(delta_chi2) + 1)
fig, ax = plotutils.subplots()
ax.bar(x, delta_chi2, label=pdf.label)
ax.set_xlabel("# Hessian PDF")
ax.set_ylabel(r"$\Delta\chi^2$")
ax.set_title(r"$\Delta\chi^2$ each eigenvector")
ax.legend()
return fig
[docs]@figure
def plot_delta_chi2_hessian_distribution(delta_chi2_hessian, pdf, total_chi2_data):
"""
Plot of the chi2 difference between chi2 of each eigenvector of a symmHessian set
and the central value for all experiments in a fit.
As a function of every eigenvector in a first plot, and as a distribution in a second plot.
"""
delta_chi2 = delta_chi2_hessian
fig, ax = plotutils.subplots()
bins = np.arange(np.floor(min(delta_chi2)), np.ceil(max(delta_chi2)) + 1)
ax.hist(
delta_chi2,
bins=bins,
label=f"{pdf.label} - $\chi^2_{0}$={total_chi2_data.central_result:.0f}",
)
ax.set_xlabel(r"$\Delta\chi^2$")
ax.set_title(r"$\Delta\chi^2$ distribution")
return fig
[docs]def pos_neg_xplotting_grids(delta_chi2_hessian, xplotting_grid):
"""
Generates xplotting_grids correspodning to positive and negative delta chi2s.
"""
positive_eigenvalue_mask = delta_chi2_hessian >= 0
# The masks do not include replica 0, add it in both grids
pos_mask = np.append(True, positive_eigenvalue_mask)
neg_mask = np.append(True, ~positive_eigenvalue_mask)
pos_grid = xplotting_grid.grid_values.data[pos_mask]
neg_grid = xplotting_grid.grid_values.data[neg_mask]
# Wrap everything back into the original stats class
stats_class = xplotting_grid.grid_values.__class__
pos_xplotting_grid = xplotting_grid.copy_grid(stats_class(pos_grid))
neg_xplotting_grid = xplotting_grid.copy_grid(stats_class(neg_grid))
return [xplotting_grid, pos_xplotting_grid, neg_xplotting_grid]
[docs]@figuregen
@check_pdf_normalize_to
@check_pdfs_noband
@check_scale("xscale", allow_none=True)
def plot_pos_neg_pdfs(
pdf,
pos_neg_xplotting_grids,
xscale: (str, type(None)) = None,
normalize_to: (int, str, type(None)) = None,
ymin=None,
ymax=None,
pdfs_noband: (list, type(None)) = None,
):
"""
Plot the the uncertainty of the original hessian pdfs, as well as that of the positive and
negative subset.
"""
original_pdf = pdf.name
# create fake PDF objects so we can reuse BandPDFPlotter
pos_pdf = PDF(original_pdf)
pos_pdf.label = f"{original_pdf}_pos"
neg_pdf = PDF(original_pdf)
neg_pdf.label = f"{original_pdf}_neg"
pdfs = [pdf, pos_pdf, neg_pdf]
yield from BandPDFPlotter(
pdfs, pos_neg_xplotting_grids, xscale, normalize_to, ymin, ymax, pdfs_noband=pdfs_noband
)
[docs]class PDFEpsilonPlotter(PDFPlotter):
"""Subclassing PDFPlotter in order to plot epsilon (measure of gaussanity)
for multiple PDFs, yielding a separate figure for each flavour
"""
[docs] def setup_flavour(self, flstate):
flstate.labels = []
flstate.handles = []
[docs] def get_ylabel(self, parton_name):
return r'$\epsilon(x)$'
[docs] def draw(self, pdf, grid, flstate):
"""Obtains the gridvalues of epsilon (measure of Gaussianity)"""
ax = flstate.ax
flindex = flstate.flindex
labels = flstate.labels
handles = flstate.handles
# Create a copy of the `Stats` instance of the grid
# with only the flavours we are interested in
gv = grid.grid_values.data
stats = grid(grid_values=gv[:, flindex, :]).grid_values
# Ignore spurious normalization warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
error68down, error68up = stats.errorbar68() # 68% error bands
errorstd = stats.std_error()
# color cycle iterable
color = ax._get_lines.get_next_color()
xgrid = grid.xgrid
# the division by 2 is equivalent to considering the complete 1-sigma band (2 * error_std)
error68 = (error68up - error68down) / 2.0
epsilon = abs(1 - errorstd / error68)
(handle,) = ax.plot(xgrid, epsilon, linestyle="-", color=color)
handles.append(handle)
labels.append(pdf.label)
return [5 * epsilon]
[docs] def legend(self, flstate):
return flstate.ax.legend(
flstate.handles,
flstate.labels,
handler_map={plotutils.HandlerSpec: plotutils.ComposedHandler()},
)
[docs]@make_argcheck
def check_pdfs_are_montecarlo(pdfs, **kwargs):
"""Checks that the action is applied only to a pdf consisiting of MC replicas."""
for pdf in pdfs:
etype = pdf.error_type
if etype != "replicas":
raise CheckError(f"Error: type of PDF {pdf} must be 'replicas' and not '{etype}'")
[docs]@figuregen
@check_pdfs_are_montecarlo
@check_scale("xscale", allow_none=True)
def plot_epsilon(
pdfs, xplotting_grids, xscale: (str, type(None)) = None, ymin=None, ymax=None, eps=None
):
"""Plot the discrepancy (epsilon) of the 1-sigma and 68% bands at each grid value
for all pdfs for a given Q. See https://arxiv.org/abs/1505.06736 eq. (11)
xscale is read from pdf plotting_grid scale, which is 'log' by default.
eps defines the value at which plot a simple hline
"""
yield from PDFEpsilonPlotter(
pdfs, xplotting_grids, xscale, normalize_to=None, ymin=ymin, ymax=ymax
)