Source code for validphys.convolution

"""This module implements tools for computing convolutions between PDFs and
theory grids, which yield observables.

The high level :py:func:`predictions` function can be used to extact theory
predictions for experimentally measured quantities::

    import numpy as np
    from validphys.api import API
    from validphys.convolution import predictions


    inp = {
        'fit': '240921-02-rs-nnpdf40-baseline',
        'use_cuts': 'internal',
        'theoryid': 40_000_000,
        'pdf': 'NNPDF40_nnlo_lowprecision',
        'dataset_inputs': {'from_': 'fit'}
    }


    all_datasets = API.data(**inp).datasets

    pdf = API.pdf(**inp)


    all_preds = [predictions(ds, pdf) for ds in all_datasets]

These functions work with :py:class:`validphys.core.DatasetSpec` objects,
allowing to account for information on predictions which requires no-trivial operations and cuts.
A lower level interface which operates with :py:class:`validphys.coredata.FKTableData`
objects is also available.
"""

import logging

log = logging.getLogger(__name__)

import functools
import operator

import numpy as np
import pandas as pd

import validphys
from validphys.pdfbases import evolution

FK_FLAVOURS = evolution.to_known_elements(
    [
        "photon",
        "singlet",
        "g",
        "V",
        "V3",
        "V8",
        "V15",
        "V24",
        "V35",
        "T3",
        "T8",
        "T15",
        "T24",
        "T35",
    ]
)

NFK = len(FK_FLAVOURS)


def _asy(a, b):
    return (a - b) / (a + b)


def _smn(a, b, c, d):
    return (a + b) / (c + d)


def _com(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t):
    return (a + b + c + d + e + f + g + h + i + j) / (k + l + m + n + o + p + q + r + s + t)


def _smt(a, b, c, d, e, f, g, h, i, j):
    return a + b + c + d + e + f + g + h + i + j


def _id(a):
    return a


def _subtract_abs(a, b):
    """An operation that subtracts from an observable the absolute value of another
    observable. An example of such an operation is the enforcing of the positivity
    boundary conditions in polarized fits of gluon distributions.

    Parameters:
    -----------
    a: np.ndarray
        Array-like object. Ex., unpolarized result.
    b: np.ndarray
        Array-like object. Ex., polarized result.

    Returns:
    --------
    np.ndarray:
        returns the result of `a - abs(b)`
    """
    return a - abs(b)


def _subtract_abspair(a, b, c, d):
    """An operation that subtracts from two observables the absolute value of another
    two observables. An example of such an operation is the enforcing of the positivity
    boundary conditions in polarized fits of quark distributions.

    Parameters:
    -----------
    a: np.ndarray
        Array-like object. Ex., unpolarized result.
    b: np.ndarray
        Array-like object. Ex., unpolarized result.
    c: np.ndarray
        Array-like object. Ex., unpolarized result.
    d: np.ndarray
        Array-like object. Ex., unpolarized result.

    Returns:
    --------
    np.ndarray:
        returns the result of `a + b - abs(c + d)`
    """
    return a + b - abs(c + d)


OP = {
    "RATIO": operator.truediv,
    "ASY": _asy,
    "ADD": operator.add,
    "SMN": _smn,
    "COM": _com,
    "SMT": _smt,
    "NULL": _id,
    "SUBTRACT_ABS": _subtract_abs,
    "SUBTRACT_ABSPAIR": _subtract_abspair,
}


[docs]class PredictionsRequireCutsError(Exception):
    pass


def _predictions(dataset, pdf, fkfunc):
    """Combine data on all the FKTables in the database according to the
    reduction operation defined therein. Dispatch the kind of predictions (for
    all replicas, central, etc) according to the provided ``fkfunc``, which
    should have the same interface as e.g. ``fk_predictions``.
    """
    opfunc = OP[dataset.op]
    if dataset.cuts is None:
        raise PredictionsRequireCutsError(
            "FKTables do not always generate predictions for some datapoints "
            "which are usually cut. Loading predictions without cuts can "
            "therefore produce predictions whose shape doesn't match the uncut "
            "commondata and is not supported."
        )
    cuts = dataset.cuts
    all_predictions = []
    for fk in dataset.fkspecs:
        fk_w_cuts = fk.load_with_cuts(cuts)
        all_predictions.append(fkfunc(fk_w_cuts, pdf))
    # Old fktables repeated values to make DEN and NUM sizes match in RATIO operations
    # pineappl tables instead just contain the one value used
    # The code below works for both situation while keeping `true_div` as the operation
    if dataset.op == "RATIO":
        all_predictions[-1] = all_predictions[-1].values
    return opfunc(*all_predictions)


[docs]def predictions(dataset, pdf):
    """ "Compute theory predictions for a given PDF and dataset. Information
    regading the dataset, on cuts, CFactors and combinations of FKTables is
    taken into account to construct the predictions.

    The result should be comparable to experimental predictions implemented in
    CommonData.

    Parameters
    ----------
    dataset : validphys.core.DatasetSpec
        The dataset containing information on the partonic cross section.
    pdf : validphys.core.PDF
        The PDF set to use for the convolutions.

    Returns
    -------
    df : pandas.DataFrame
        A dataframe corresponding to the hadronic prediction for each data
        point for the PDF members. The index of the dataframe corresponds to
        the selected data points, based on the dataset :ref:`cuts <filters>`. The
        columns correspond to the selected PDF members in the LHAPDF set.

    Examples
    --------
    Obtain descriptive statistics over PDF replicas for each of the three
    points in the ATLAS ttbar dataset:


    >>> from validphys.loader import Loader
    >>> l = Loader()
    >>> ds = l.check_dataset('ATLASTTBARTOT', theoryid=53)
    >>> from validphys.convolution import predictions
    >>> pdf = l.check_pdf('NNPDF31_nnlo_as_0118')
    >>> preds = predictions(ds, pdf)
    >>> preds.T.describe()
    data            0           1           2
    count  100.000000  100.000000  100.000000
    mean   161.271292  231.500367  767.816844
    std      2.227304    2.883497    7.327617
    min    156.638526  225.283254  750.850250
    25%    159.652216  229.486793  762.773527
    50%    161.066965  231.281248  767.619249
    75%    162.620554  233.306836  772.390286
    max    168.390840  240.287549  786.549380


    """
    return _predictions(dataset, pdf, fk_predictions)


[docs]def central_predictions(
    dataset: validphys.core.DataSetSpec, pdf: validphys.core.PDF
) -> pd.DataFrame:
    """Same as :py:func:`predictions` but computing the predictions for the
    central member of the PDF set only. For Monte Carlo PDFs, this is a faster
    alternative to computing the central predictions as the average of the
    replica predictions (although a small approximation is involved in the case
    of hadronic predictions).
    """
    return _predictions(dataset, pdf, central_fk_predictions)


[docs]def linear_predictions(dataset, pdf):
    """Same as :py:func:`predictions` but computing *linearized* predictions.
    These are the same as ``predictions`` for DIS, but truncates to the terms
    that are linear in the difference between each member and the central
    value for hadronic predictions.

    This approximation is generally a very good approximation in that yields
    differences that are much smaller that the PDF uncertainty.
    """
    logging.error("Linear predictions are deprecated and will be removed")
    return _predictions(dataset, pdf, linear_fk_predictions)


[docs]def fk_predictions(loaded_fk, pdf):
    """Low level function to compute predictions from a
    FKTable.

    Parameters
    ----------
    loaded_fk : validphys.coredata.FKTableData
        The FKTable corresponding to the partonic cross section.
    pdf :  validphys.core.PDF
        The PDF set to use for the convolutions.

    Returns
    -------
    df : pandas.DataFrame
        A dataframe corresponding to the hadronic prediction for each data
        point for the PDF members. The index of the dataframe corresponds to
        the selected data points (use
        :py:meth:`validphys.coredata.FKTableData.with_cuts` to filter out
        points). The columns correspond to the selected PDF members in the LHAPDF set.

    Notes
    -----
    This function operates on a single FKTable, while the prediction for an
    experimental quantity generally involves several. Use
    :py:func:`predictions` to compute those.

    Examples
    --------

        >>> from validphys.loader import Loader
        >>> from validphys.convolution import hadron_predictions
        >>> from validphys.fkparser import load_fktable
        >>> l = Loader()
        >>> pdf = l.check_pdf('NNPDF31_nnlo_as_0118')
        >>> ds = l.check_dataset('ATLASTTBARTOT', theoryid=53, cfac=('QCD',))
        >>> table = load_fktable(ds.fkspecs[0])
        >>> hadron_predictions(table, pdf)
                     1           2           3           4    ...         97          98          99          100
        data                                                  ...
        0     176.688118  170.172930  172.460771  173.792321  ...  179.504636  172.343792  168.372508  169.927820
        1     252.682923  244.507916  247.840249  249.541798  ...  256.410844  247.805180  242.246438  244.415529
        2     828.076008  813.452551  824.581569  828.213508  ...  838.707211  826.056388  810.310109  816.824167

    """
    if loaded_fk.hadronic:
        return hadron_predictions(loaded_fk, pdf)
    else:
        return dis_predictions(loaded_fk, pdf)


[docs]def central_fk_predictions(loaded_fk, pdf):
    """Same as :py:func:`fk_predictions`, but computing predictions for the
    central PDF member only."""
    if loaded_fk.hadronic:
        return central_hadron_predictions(loaded_fk, pdf)
    else:
        return central_dis_predictions(loaded_fk, pdf)


[docs]def linear_fk_predictions(loaded_fk, pdf):
    """Same as :py:func:`predictions` for DIS, but compute linearized
    predictions for hadronic data, using :py:func:`linear_hadron_predictions`.
    """
    if loaded_fk.hadronic:
        return linear_hadron_predictions(loaded_fk, pdf)
    else:
        return dis_predictions(loaded_fk, pdf)


def _gv_hadron_predictions(loaded_fk, gv1func, gv2func=None):
    """Compute hadronic convolutions between the loaded FKTable
    and the PDF evaluation functions `gv1func` and `gv2func`.
    These must have the same interface as
    :py:meth:`validphys.pdfbases.evolution.grid_values`, but without the PDF
    argument.

    If gv2func is not given, then gv1func will be used for the second PDF,
    with the grid being evaluated only once.
    """
    xgrid = loaded_fk.xgrid
    Q = loaded_fk.Q0
    sigma = loaded_fk.sigma

    # Generate gid values for all flavours in the evolution basis, in the
    # expected order.
    #
    # Squeeze to remove the dimension over Q.
    gv1 = gv1func(qmat=[Q], vmat=FK_FLAVOURS, xmat=xgrid).squeeze(-1)
    if gv2func is not None:
        gv2 = gv2func(qmat=[Q], vmat=FK_FLAVOURS, xmat=xgrid).squeeze(-1)
    else:
        gv2 = gv1

    # TODO: Ideally we would return before computing the grid
    if sigma.empty:
        return pd.DataFrame(columns=range(gv1.shape[0]))

    # The hadronic FK table columns are indexes into the NFK*NFK table of
    # possible flavour combinations of the two PDFs, with the convention of
    # looping first of the first index and the over the second: If the flavour
    # index of the first PDF is ``i`` and the second is ``j``, then the column
    # value in the FKTable is ``i*NFK + j``. This can easily be inverted using
    # the ``np.indices``, which is used here to map the column index to i and
    # j.
    fm = sigma.columns
    all_fl_indices_1, all_fl_indices_2 = np.indices((NFK, NFK))
    # The flavour indices of the first and second PDF for each combination
    # (column) are the columns indexing into the flattened indices.
    fl1 = all_fl_indices_1.ravel()[fm]
    fl2 = all_fl_indices_2.ravel()[fm]
    # Once we have the flavours, shape the PDF grids as appropriate for the
    # convolution below: We are left with two tensor of shape
    # ``nmembers * len(sigma.columns) * nx`` such that the pairs of flavours of the two
    # combinations correspond to the combination encoded in the FKTable.
    expanded_gv1 = gv1[:, fl1, :]
    expanded_gv2 = gv2[:, fl2, :]
    # Create a luminosity tensor holding the value f1(x1)*f2(x2) for all
    # possible x1-x2 combinations (f1, f2, x1, x2)
    luminosity = np.einsum("ijk, ijl->ijkl", expanded_gv1, expanded_gv2)

    def appl(df):
        # x1 and x2 are encoded as the first and second index levels.
        xx1 = df.index.get_level_values(1)
        xx2 = df.index.get_level_values(2)
        # take the active combinations from the luminosity tensor
        partial_lumi = luminosity[..., xx1, xx2]
        return pd.Series(np.einsum("ijk,kj->i", partial_lumi, df.values))

    return sigma.groupby(level=0).apply(appl)


def _gv_dis_predictions(loaded_fk, gvfunc):
    xgrid = loaded_fk.xgrid
    Q = loaded_fk.Q0
    sigma = loaded_fk.sigma
    # The column indexes are indices into the FK_FLAVOURS list above.
    fm = sigma.columns
    # Squeeze to remove the dimension over Q.
    gv = gvfunc(qmat=[Q], vmat=FK_FLAVOURS[fm], xmat=xgrid).squeeze(-1)

    # TODO: Ideally we would return before computing the grid
    if sigma.empty:
        return pd.DataFrame(columns=range(gv.shape[0]))

    def appl(df):
        # x is encoded as the first index level.
        xind = df.index.get_level_values(1)
        return pd.Series(np.einsum("ijk,kj->i", gv[:, :, xind], df.values))

    return sigma.groupby(level=0).apply(appl)


[docs]def hadron_predictions(loaded_fk, pdf):
    """Implementation of :py:func:`fk_predictions` for hadronic observables."""
    pdf_list = loaded_fk.determine_pdfs(pdf)
    gv1 = functools.partial(evolution.grid_values, pdf=pdf_list[0])
    if pdf_list[1] == pdf_list[0]:
        gv2 = None
    else:
        gv2 = functools.partial(evolution.grid_values, pdf=pdf_list[1])
    res = _gv_hadron_predictions(loaded_fk, gv1, gv2)
    res.columns = range(pdf_list[0].get_members())
    return res


[docs]def central_hadron_predictions(loaded_fk, pdf):
    """Implementation of :py:func:`central_fk_predictions` for hadronic
    observables."""
    pdf_list = loaded_fk.determine_pdfs(pdf)
    gv1 = functools.partial(evolution.central_grid_values, pdf=pdf_list[0])
    if pdf_list[1] == pdf_list[0]:
        gv2 = None
    else:
        gv2 = functools.partial(evolution.central_grid_values, pdf=pdf_list[1])
    return _gv_hadron_predictions(loaded_fk, gv1, gv2)


[docs]def linear_hadron_predictions(loaded_fk, pdf):
    """Implementation of :py:func:`linear_fk_predictions` for hadronic
    observables. Specifically this computes:

        central_value ⊗ FK ⊗ (2 * replica_values - central_value)

    which is the linear expansion of the hadronic observable in the difference
    between each replica and the central value, ``replica_values -
    central_value``
    """
    pdf_list = loaded_fk.determine_pdfs(pdf)
    gv1 = functools.partial(evolution.central_grid_values, pdf=pdf_list[0])

    def gv2(*args, **kwargs):
        replica_values = evolution.grid_values(pdf_list[1], *args, **kwargs)
        central_value = evolution.central_grid_values(pdf_list[1], *args, **kwargs)
        return 2 * replica_values - central_value

    res = _gv_hadron_predictions(loaded_fk, gv1, gv2)
    res.columns = range(pdf_list[0].get_members())
    return res


[docs]def dis_predictions(loaded_fk, pdf):
    """Implementation of :py:func:`fk_predictions` for DIS observables."""
    pdf = loaded_fk.determine_pdfs(pdf)[0]
    gv = functools.partial(evolution.grid_values, pdf=pdf)
    res = _gv_dis_predictions(loaded_fk, gv)
    res.columns = range(pdf.get_members())
    return res


[docs]def central_dis_predictions(loaded_fk, pdf):
    """Implementation of :py:func:`central_fk_predictions` for DIS
    observables."""
    pdf = loaded_fk.determine_pdfs(pdf)[0]
    gv = functools.partial(evolution.central_grid_values, pdf=pdf)
    return _gv_dis_predictions(loaded_fk, gv)