Source code for validphys.closuretest.multiclosure_pseudodata

"""
multiclosure_pseudodata

actions which load fit pseudodata and compute actions related to overfitting.
Estimators here can only be calculated on data used in the fit.

"""

import numpy as np
import pandas as pd

from reportengine import collect
from reportengine.table import table
from validphys.calcutils import calc_chi2
from validphys.closuretest.closure_checks import check_use_fitcommondata
from validphys.core import cut_mask

# NOTE: for some reason the fit doesn't get properly resolved if you try to
# collect data over fits
fits_dataset = collect("dataset", ("fits",))


[docs]@check_use_fitcommondata def fits_dataset_cvs(fits_dataset): """Internal function for loading the level one data for all fits for a single dataset. This function avoids the stringent metadata checks of the newer python commondata parser. """ fits_cv = [] for ds in fits_dataset: cd_df = ds.commondata.metadata.load_data_central() fits_cv.append(cd_df.iloc[cut_mask(ds.cuts)].to_numpy()) return fits_cv
data_fits_cv = collect(fits_dataset_cvs, ("data",))
[docs]def expected_data_delta_chi2(data_fits_cv, internal_multiclosure_data_loader): """For ``data``, calculate the mean of delta chi2 across all fits, returns a tuple of number of data points and unnormalised delta chi2. """ closures_th, law_th, _, sqrt_covmat = internal_multiclosure_data_loader law_central = law_th.central_value fits_delta_chi2 = [] for i_fit, fit_th in enumerate(closures_th): # transpose the datasets fits cvs into the cvs for all datasets for single fit dt_central = np.concatenate([fits_cvs[i_fit] for fits_cvs in data_fits_cv]) th_central = fit_th.central_value shift = calc_chi2(sqrt_covmat, law_central - dt_central) chi2_cent = calc_chi2(sqrt_covmat, th_central - dt_central) fits_delta_chi2.append(chi2_cent - shift) ndata = len(law_central) return ndata, np.mean(fits_delta_chi2)
exps_expected_delta_chi2 = collect( "expected_data_delta_chi2", ("group_dataset_inputs_by_experiment",) )
[docs]def total_expected_data_delta_chi2(exps_expected_delta_chi2): """Takes :py:func:`expected_data_delta_chi2` evaluated for each experiment and then sums across experiments. Returns the total number of datapoints and unnormalised delta chi2. """ ndata, delta_chi2 = np.sum(exps_expected_delta_chi2, axis=0) return ndata, delta_chi2
groups_expected_delta_chi2 = collect( "expected_data_delta_chi2", ("group_dataset_inputs_by_metadata",) )
[docs]@table def expected_delta_chi2_table( groups_expected_delta_chi2, group_dataset_inputs_by_metadata, total_expected_data_delta_chi2 ): """Tabulate the expectation value of delta chi2 across fits for groups with an additional row with the total across all data at the bottom. """ records = [] for group, delta_chi2_res in zip(group_dataset_inputs_by_metadata, groups_expected_delta_chi2): name = group["group_name"] ndata, delta_chi2 = delta_chi2_res records.append(dict(group=name, ndata=ndata, delta_chi2=delta_chi2 / ndata)) ndata, delta_chi2 = total_expected_data_delta_chi2 records.append(dict(group="Total", ndata=ndata, delta_chi2=delta_chi2 / ndata)) df = pd.DataFrame.from_records(records, index="group") df.columns = ["ndata", r"$\Delta_{\chi^2}$"] return df