Source code for validphys.closuretest.inconsistent_closuretest.inconsistent_ct

"""
This module contains the InconsistentCommonData class which is meant to have all the
methods needed in order to introduce an inconsistency within a Closure Test.
"""

import dataclasses

import pandas as pd

from nnpdf_data.coredata import CommonData
from validphys.utils import yaml_safe


[docs] @dataclasses.dataclass(eq=False) class InconsistentCommonData(CommonData): """ Class that inherits all of the methods of coredata.CommonData class. This class is meant to have all the methods needed in order to introduce an inconsistency within a Closure Test. """ setname: str ndata: int commondataproc: str nkin: int nsys: int commondata_table: pd.DataFrame = dataclasses.field(repr=False) systype_table: pd.DataFrame = dataclasses.field(repr=False) systematics_table: pd.DataFrame = dataclasses.field(default=None, repr=False) _systematic_errors: any = dataclasses.field(default=None, init=False) @property def systematic_errors(self): """ Overrides the systematic_errors method of the CommonData class. This is done in order to allow the systematic_errors to be a property and hence to be able to assign values to it (setter). """ if self._systematic_errors is None: return super().systematic_errors() return self._systematic_errors @systematic_errors.setter def systematic_errors(self, value): # Define the setter to allow assignment to systematic_errors self._systematic_errors = value
[docs] def select_systype_table_indices(self, treatment_names, names_uncertainties): """ Is used to get the indices of the systype_table that correspond to the intersection of the treatment_names and names_uncertainties lists. Parameters ---------- treatment_names : list list of the names of the treatments that should be selected possible values are: MULT, ADD names_uncertainties : list list of the names of the uncertainties that should be selected possible values are: CORR, UNCORR, THEORYCORR, THEORYUNCORR, SPECIAL SPECIAL is used for intra-dataset systematics Returns ------- systype_tab.index : pd.Index """ # check that names_uncertainties only contains either CORR, UNCORR, THEORYCORR, THEORYUNCORR or SPECIAL # if not raise an error if not all( name in ["CORR", "UNCORR", "THEORYCORR", "THEORYUNCORR", "SPECIAL"] for name in names_uncertainties ): raise ValueError( "names_uncertainties should only contain either CORR, UNCORR, THEORYCORR, THEORYUNCORR or SPECIAL" ) # if "SPECIAL", then we need to select the intra-dataset systematics if "SPECIAL" in names_uncertainties: # avoid circular import error from validphys.covmats import INTRA_DATASET_SYS_NAME # note: | operator allows to extend the condition so as to also include the names_uncertainties systype_tab = self.systype_table[ (self.systype_table["treatment"].isin(treatment_names)) & ( ~self.systype_table["name"].isin(INTRA_DATASET_SYS_NAME) | self.systype_table["name"].isin( [name for name in names_uncertainties if name != "SPECIAL"] ) ) ] else: systype_tab = self.systype_table[ (self.systype_table["treatment"].isin(treatment_names)) & (self.systype_table["name"].isin(names_uncertainties)) ] return systype_tab.index
[docs] def rescale_systematics(self, treatment_names, names_uncertainties, sys_rescaling_factor): """ Rescale the columns of the systematic_errors() that are included in the the names_uncertainties list. And return the rescaled table. Parameters ---------- treatment_names : list list of the names of the treatments that should be rescaled possible values are: MULT, ADD names_uncertainties : list list of the names of the uncertainties that should be rescaled possible values are: CORR, UNCORR, THEORYCORR, THEORYUNCORR, SPECIAL SPECIAL is used for intra-dataset systematics sys_rescaling_factor : float factor by which the systematics should be rescaled Returns ------- self.systematics_table : pd.DataFrame """ sys_table = self.systematic_errors.copy() # select the columns of the systematics_table that should be rescaled systype_idx = self.select_systype_table_indices( treatment_names=treatment_names, names_uncertainties=names_uncertainties ) # rescale columns of the systematics_table that are included in the index systype_idx sys_table.iloc[:, systype_idx - 1] *= sys_rescaling_factor return sys_table
[docs] def process_commondata( self, treatment_names, names_uncertainties, sys_rescaling_factor, inconsistent_datasets ): """ returns a commondata instance with modified systematics. Note that if commondata.setname is not within the inconsistent_datasets or if both ADD and MULT are False, then the commondata object will not be modified. Parameters ---------- treatment_names : list list of the names of the treatments that should be rescaled possible values are: MULT, ADD names_uncertainties : list list of the names of the uncertainties that should be rescaled possible values are: CORR, UNCORR, THEORYCORR, THEORYUNCORR, SPECIAL SPECIAL is used for intra-dataset systematics sys_rescaling_factor : float, int inconsistent_datasets : list list of the datasets for which an inconsistency should be introduced Returns ------- validphys.inconsistent_ct.InconsistentCommonData """ new_commondata = self if not self.setname in inconsistent_datasets: return self # needs setter to allow assignment to systematic_errors new_commondata.systematic_errors = self.rescale_systematics( treatment_names, names_uncertainties, sys_rescaling_factor ) return new_commondata
[docs] def export_uncertainties(self, buffer): """ Same as the export_uncertainties method of the CommonData class. The only difference is that systematic_errors is now a property of the class and not a method. """ definitions = {} for idx, row in self.systype_table.iterrows(): if row["name"] != "SKIP": definitions[f"sys_{idx}"] = {"treatment": row["treatment"], "type": row["name"]} # Order the definitions by treatment as ADD, MULT # TODO: make it so that it corresponds to the original order exactly sorted_definitions = { k: v for k, v in sorted(definitions.items(), key=lambda item: item[1]["treatment"]) } bins = [] for idx, row in self.systematic_errors.iterrows(): tmp = {"stat": float(self.stat_errors[idx])} # Hope things come in the right order... for key_name, val in zip(sorted_definitions, row): tmp[key_name] = float(val) bins.append(tmp) sorted_definitions["stat"] = { "description": "Uncorrelated statistical uncertainties", "treatment": "ADD", "type": "UNCORR", } ret = {"definitions": sorted_definitions, "bins": bins} yaml_safe.dump(ret, buffer)