Source code for validphys.closuretest.inconsistent_closuretest.inconsistent_ct

"""
This module contains the InconsistentCommonData class which is meant to have all the
methods needed in order to introduce an inconsistency within a Closure Test.
"""

import dataclasses

import pandas as pd

from nnpdf_data.coredata import CommonData
from validphys.utils import yaml_safe


[docs]@dataclasses.dataclass(eq=False)
class InconsistentCommonData(CommonData):
    """
    Class that inherits all of the methods
    of coredata.CommonData class.

    This class is meant to have all the
    methods needed in order to introduce
    an inconsistency within a Closure Test.
    """

    setname: str
    ndata: int
    commondataproc: str
    nkin: int
    nsys: int
    commondata_table: pd.DataFrame = dataclasses.field(repr=False)
    systype_table: pd.DataFrame = dataclasses.field(repr=False)
    systematics_table: pd.DataFrame = dataclasses.field(default=None, repr=False)
    _systematic_errors: any = dataclasses.field(default=None, init=False)

    @property
    def systematic_errors(self):
        """
        Overrides the systematic_errors method of the CommonData class.

        This is done in order to allow the systematic_errors to be a property
        and hence to be able to assign values to it (setter).
        """
        if self._systematic_errors is None:
            return super().systematic_errors()
        return self._systematic_errors

    @systematic_errors.setter
    def systematic_errors(self, value):
        # Define the setter to allow assignment to systematic_errors
        self._systematic_errors = value

[docs]    def select_systype_table_indices(self, treatment_names, names_uncertainties):
        """
        Is used to get the indices of the systype_table that correspond to the
        intersection of the treatment_names and names_uncertainties lists.

        Parameters
        ----------
        treatment_names : list
            list of the names of the treatments that should be selected
            possible values are: MULT, ADD

        names_uncertainties : list
            list of the names of the uncertainties that should be selected
            possible values are: CORR, UNCORR, THEORYCORR, THEORYUNCORR, SPECIAL
            SPECIAL is used for intra-dataset systematics

        Returns
        -------
        systype_tab.index : pd.Index
        """
        # check that names_uncertainties only contains either CORR, UNCORR, THEORYCORR, THEORYUNCORR or SPECIAL
        # if not raise an error
        if not all(
            name in ["CORR", "UNCORR", "THEORYCORR", "THEORYUNCORR", "SPECIAL"]
            for name in names_uncertainties
        ):
            raise ValueError(
                "names_uncertainties should only contain either CORR, UNCORR, THEORYCORR, THEORYUNCORR or SPECIAL"
            )

        # if "SPECIAL", then we need to select the intra-dataset systematics
        if "SPECIAL" in names_uncertainties:
            # avoid circular import error
            from validphys.covmats import INTRA_DATASET_SYS_NAME

            # note: | operator allows to extend the condition so as to also include the names_uncertainties
            systype_tab = self.systype_table[
                (self.systype_table["treatment"].isin(treatment_names))
                & (
                    ~self.systype_table["name"].isin(INTRA_DATASET_SYS_NAME)
                    | self.systype_table["name"].isin(
                        [name for name in names_uncertainties if name != "SPECIAL"]
                    )
                )
            ]

        else:
            systype_tab = self.systype_table[
                (self.systype_table["treatment"].isin(treatment_names))
                & (self.systype_table["name"].isin(names_uncertainties))
            ]

        return systype_tab.index

[docs]    def rescale_systematics(self, treatment_names, names_uncertainties, sys_rescaling_factor):
        """
        Rescale the columns of the systematic_errors() that are included in the
        the names_uncertainties list. And return the rescaled table.

        Parameters
        ----------
        treatment_names : list
            list of the names of the treatments that should be rescaled
            possible values are: MULT, ADD

        names_uncertainties : list
            list of the names of the uncertainties that should be rescaled
            possible values are: CORR, UNCORR, THEORYCORR, THEORYUNCORR, SPECIAL
            SPECIAL is used for intra-dataset systematics

        sys_rescaling_factor : float
            factor by which the systematics should be rescaled

        Returns
        -------
        self.systematics_table : pd.DataFrame
        """

        sys_table = self.systematic_errors.copy()

        # select the columns of the systematics_table that should be rescaled
        systype_idx = self.select_systype_table_indices(
            treatment_names=treatment_names, names_uncertainties=names_uncertainties
        )

        # rescale columns of the systematics_table that are included in the index systype_idx
        sys_table.iloc[:, systype_idx - 1] *= sys_rescaling_factor

        return sys_table

[docs]    def process_commondata(
        self, treatment_names, names_uncertainties, sys_rescaling_factor, inconsistent_datasets
    ):
        """
        returns a commondata instance
        with modified systematics.
        Note that if commondata.setname
        is not within the inconsistent_datasets or if both ADD and
        MULT are False, then the commondata object
        will not be modified.

        Parameters
        ----------
        treatment_names : list
                            list of the names of the treatments that should be rescaled
                            possible values are: MULT, ADD

        names_uncertainties : list
                            list of the names of the uncertainties that should be rescaled
                            possible values are: CORR, UNCORR, THEORYCORR, THEORYUNCORR, SPECIAL
                            SPECIAL is used for intra-dataset systematics

        sys_rescaling_factor : float, int

        inconsistent_datasets : list
                            list of the datasets for which an inconsistency should be introduced

        Returns
        -------
        validphys.inconsistent_ct.InconsistentCommonData
        """
        new_commondata = self

        if not self.setname in inconsistent_datasets:
            return self

        # needs setter to allow assignment to systematic_errors
        new_commondata.systematic_errors = self.rescale_systematics(
            treatment_names, names_uncertainties, sys_rescaling_factor
        )

        return new_commondata

[docs]    def export_uncertainties(self, buffer):
        """
        Same as the export_uncertainties method of the CommonData class.
        The only difference is that systematic_errors is now a property of the class
        and not a method.
        """
        definitions = {}
        for idx, row in self.systype_table.iterrows():
            if row["name"] != "SKIP":
                definitions[f"sys_{idx}"] = {"treatment": row["treatment"], "type": row["name"]}

        # Order the definitions by treatment as ADD, MULT
        # TODO: make it so that it corresponds to the original order exactly
        sorted_definitions = {
            k: v for k, v in sorted(definitions.items(), key=lambda item: item[1]["treatment"])
        }
        bins = []

        for idx, row in self.systematic_errors.iterrows():
            tmp = {"stat": float(self.stat_errors[idx])}
            # Hope things come in the right order...
            for key_name, val in zip(sorted_definitions, row):
                tmp[key_name] = float(val)

            bins.append(tmp)

        sorted_definitions["stat"] = {
            "description": "Uncorrelated statistical uncertainties",
            "treatment": "ADD",
            "type": "UNCORR",
        }
        ret = {"definitions": sorted_definitions, "bins": bins}
        yaml_safe.dump(ret, buffer)