Source code for evolven3fit.evolve

"""
This module collects functions to evolve an arbitrary PDF from an initial scale to the scales
define by the loaded EKO.

It also contains functions specialized for NNDPF / n3fit
"""

from collections import defaultdict
import dataclasses
import json
import logging
import pathlib
import shutil
import sys
import tempfile

from ekobox import apply, genpdf, info_file
import numpy as np

import eko
from eko import basis_rotation
from eko.interpolation import XGrid
from eko.io import manipulate
from validphys.pdfbases import PIDS_DICT
from validphys.utils import yaml_safe

_logger = logging.getLogger(__name__)

LOG_FILE = "evolven3fit.log"

LOGGING_SETTINGS = {
    "formatter": logging.Formatter("%(asctime)s %(name)s/%(levelname)s: %(message)s"),
    "level": logging.DEBUG,
}

LABEL_TO_PIDS = {v: k for k, v in PIDS_DICT.items()}


[docs]@dataclasses.dataclass
class ExportGrid:
    """
    Holds information about the PDF at a fixed scale.

    q20: float
        Value of Q2 at which the PDF is evaluated

    xgrid: np.ndarray
        The points in x at which the PDF is evaluated

    pdfgrid: np.ndarray
        A list of the x*PDF values for all flavours for every point in x
        shape (len(xgrid), len(labels))

    pids: list[int]
        A list of the flavours contained in each element of the pdfgrid, by PID.

    labels: list[str]
        A list of the flavours contained in each element of the pdfgrid, by label. Not necessary if pids is given.

    replica: int
        Index of the corresponding monte carlo replica or member
    """

    q20: float
    xgrid: np.ndarray
    pdfgrid: np.ndarray
    labels: list[str] = None
    pids: list[int] = None
    replica: int = None
    hessian: bool = False

    def __post_init__(self):
        """Convert possible lists to arrays"""
        self.pdfgrid = np.array(self.pdfgrid)
        self.xgrid = np.array(self.xgrid)
        if self.labels is None and self.pids is None:
            raise ValueError("At least one of `labels=`, `pids=` must be given")

        # Fill the other
        if self.pids is None:
            self.pids = [LABEL_TO_PIDS[i] for i in self.labels]
        if self.labels is None:
            self.labels = [PIDS_DICT[i] for i in self.pids]

    @property
    def pdfvalues(self):
        """Return the PDF, i.e., pdfgrid / xgrid,
        with a shape (flavours, xgrid)
        """
        return self.pdfgrid.T / self.xgrid


[docs]def evolve_exportgrid(eko_path: pathlib.Path, exportgrids: list[ExportGrid]):
    """Takes the path to an EKO and a list of exportgrids,
    returns a tuple with an info file and the
    evolved exportgrid as a dictionary of the form:

        {
            (Q_1^2, nf1): (replica, flavours, x),
            (Q_2^2, nf1): (replica, flavours, x),
            ...
            (Q_3^2, nf2): (replica, flavours, x),
        }

        with the output grouped by nf and sorted in ascending order by Q2

    Parameters
    ----------
        eko_path: pathlib.Path
            Path to the evolution eko
        exportgrids: list[ExportGrid]
            List of ExportGrid objects to be evolved

    Returns
    -------
        info_file: eko_box.info_file
            dict-like object with the info file information

        evolved_replicas: dict
            a dictionary containing all evolved PDF.
            The format of the output is
                { (q2, flavour number): np.ndarray(replica, flavours, x) }
    """
    # Check that all exportgrid objects have been evaluated for 1) The same value of Q, the same value of x
    ref = exportgrids[0]
    hessian_fit = ref.hessian

    for egrid in exportgrids:
        assert egrid.q20 == ref.q20, "Different values of q0 found among the exportgrids"
        np.testing.assert_allclose(
            ref.xgrid, egrid.xgrid, err_msg="ExportGrids are not all evaluate at the same x nodes"
        )
        assert (
            hessian_fit == egrid.hessian
        ), "Trying to evolve hessian and non-hessian fit at the same time"

    # Read the EKO and the operator and theory cards
    eko_op = eko.EKO.read(eko_path)
    theory = eko_op.theory_card
    op = eko_op.operator_card

    assert ref.q20 == op.mu20, f"The EKO can only evolve from {op.mu20}, PDF asked for {ref.q20}"

    _logger.debug(f"Theory card: {json.dumps(theory.raw)}")
    _logger.debug(f"Operator card: {json.dumps(op.raw)}")

    # Check whether the xgrid of the exportgrids matches the EKO
    # if not, construct a copy in memory of the EKO with the xgrid rotated
    # this copy is constructed by replacing the internal inventory of operators
    eko_original_xgrid = eko_op.xgrid
    x_grid = ref.xgrid
    if XGrid(x_grid) != eko_original_xgrid:
        new_xgrid = XGrid(x_grid)
        new_metadata = dataclasses.replace(eko_op.metadata, xgrid=new_xgrid)

        new_operators = {}
        for target_key in eko_op.operators:
            elem = eko_op[target_key.ep]

            if eko_op.metadata.version == "0.13.4":
                # For eko 0.13.4 xgrid is the internal interpolation so we need to check
                # whether the rotation is truly needed
                # <in practice> this means checking whether the operator shape matches the grid
                oplen = elem.operator.shape[-1]
                if oplen != len(eko_original_xgrid):
                    # The operator and its xgrid have different shape
                    # either prepare an identity, or this EKO is not supported
                    if oplen != len(x_grid):
                        raise ValueError(
                            f"The operator at {eko_path} is not usable, version not supported"
                        )
                    eko_original_xgrid = XGrid(x_grid)

            new_operators[target_key] = manipulate.xgrid_reshape(
                elem,
                eko_original_xgrid,
                op.configs.interpolation_polynomial_degree,
                targetgrid=XGrid(x_grid),
                inputgrid=XGrid(x_grid),
            )

        new_inventory = dataclasses.replace(eko_op.operators, cache=new_operators)
        eko_op = dataclasses.replace(eko_op, metadata=new_metadata, operators=new_inventory)

    all_replicas = []
    for exportgrid in exportgrids:
        pid_to_idx = {pid: idx for idx, pid in enumerate(exportgrid.pids)}
        reindexer = [pid_to_idx[pid] for pid in basis_rotation.flavor_basis_pids]

        all_replicas.append(exportgrid.pdfvalues[reindexer])

    # output is a dictionary {(Q2, nf): (replica, flavour, x)}
    all_evolved, _ = apply.apply_grids(eko_op, np.array(all_replicas))
    # sort the output in terms of (Q2, nf) but grouped by nf
    sorted_evolved = dict(sorted(all_evolved.items(), key=lambda item: (item[0][1], item[0][0])))

    info = info_file.build(theory, op, 1, info_update={})
    info["NumMembers"] = "REPLACE_NREP"
    if hessian_fit:
        info["ErrorType"] = "hessian"
    else:
        info["ErrorType"] = "replicas"
    info["XMin"] = float(x_grid[0])
    info["XMax"] = float(x_grid[-1])
    info["Flavors"] = basis_rotation.flavor_basis_pids
    info.setdefault("NumFlavors", 5)

    return info, sorted_evolved


[docs]def evolve_exportgrids_into_lhapdf(
    eko_path: pathlib.Path,
    exportgrids: list[ExportGrid],
    output_files: list[pathlib.Path],
    info_file: pathlib.Path,
    finalize: bool = False,
):
    """
    Exportgrid evolution function.

    This function takes the path to an ``eko.tar`` and a list of ``ExportGrid`` objects
    and generate the corresponding ``.dat`` LHAPDF files as given by the ``output_files`` list.

    Parameters
    ----------
        eko_path: pathlib.Path
            Path to the evolution eko
        exportgrids: list[ExportGrid]
            list of the PDF grids to evolve, the settings must match those of the EKO
        output_files: list[pathlib.Path]
            list of .dat files where the evolved fit will be written, e.g., ["replica_0.dat", "replica_1.dat"]
            the list must be of the same size as exportgrids
        info_file: pathlib.Path
            path to the info file
        finalize: bool
            If True, try to finalize the info file, otherwise keep placeholders to be filled at a later step
    """
    if len(exportgrids) != len(output_files):
        raise ValueError("The length of output_files and exportgrids must be equal")

    # Make sure the parent directory of every output_file either exists or can be created
    for f in output_files:
        pathlib.Path(f).parent.mkdir(exist_ok=True, parents=True)
    pathlib.Path(info_file).parent.mkdir(exist_ok=True, parents=True)

    # all evolved is a dictionary {(Q2, nf): (replica, flavour, x)}
    # ordered first by nf and then by Q2 in ascending order
    info, all_evolved = evolve_exportgrid(eko_path, exportgrids)

    # The ``dump`` functions from eko's genpdf are very opinionated regarding the output folder of the files
    # therefore we create a temporary directory where to put stuff and then move it to the right place
    temp_dir = tempfile.TemporaryDirectory()
    temp_path = pathlib.Path(temp_dir.name)

    if finalize:
        info["NumMembers"] = len(exportgrids)

    genpdf.export.dump_info(temp_path, info)
    temp_info = temp_path / f"{temp_path.stem}.info"
    shutil.move(temp_info, info_file)

    # Dump LHAPDF files as .dat files in blocks of nf
    targetgrid = exportgrids[0].xgrid.tolist()
    q2block_per_nf = defaultdict(list)
    for q2, nf in all_evolved.keys():
        q2block_per_nf[nf].append(q2)

    for enum, (exportgrid, output_file) in enumerate(zip(exportgrids, output_files)):
        replica_idx = exportgrid.replica
        if replica_idx is None and exportgrid.hessian:
            replica_idx = enum
        blocks = []

        for nf, q2grid in q2block_per_nf.items():

            def pdf_xq2(pid, x, Q2):
                x_idx = targetgrid.index(x)
                pid_idx = info["Flavors"].index(pid)
                ret = x * all_evolved[(Q2, nf)][enum][pid_idx][x_idx]
                return ret

            block = genpdf.generate_block(
                pdf_xq2, xgrid=targetgrid, sorted_q2grid=q2grid, pids=info["Flavors"]
            )
            blocks.append(block)

        dat_path = dump_evolved_replica(blocks, temp_path, replica_idx, exportgrid.hessian)
        if not dat_path.exists():
            raise FileNotFoundError(
                "The expected {dat_path} file was not found after dumping the blocks"
            )
        shutil.move(dat_path, output_file)

    temp_dir.cleanup()


[docs]def evolve_fit(fit_folder, force, eko_path, hessian_fit=False):
    """
    Evolves all the fitted replica in fit_folder/nnfit

    Parameters
    ----------

        fit_folder: str or pathlib.Path
            path to the folder containing the fit
        force: bool
            whether to force the evolution to be done again
        eko_path: str or pathlib.Path
            path where the eko is stored (if None the eko will be
            recomputed)
        hessian_fit: bool
            wether the fit is hessian
    """
    fit_folder = pathlib.Path(fit_folder)
    log_file = fit_folder / LOG_FILE
    if log_file.exists():
        if force:
            log_file.unlink()
        else:
            raise FileExistsError(
                f"Log file already exists: {log_file}. Has evolven3fit already been run?"
            )
    log_file = logging.FileHandler(log_file)
    stdout_log = logging.StreamHandler(sys.stdout)
    for log in [log_file, stdout_log]:
        log.setFormatter(LOGGING_SETTINGS["formatter"])

    # The log file will get everything
    log_file.setLevel(LOGGING_SETTINGS["level"])
    # While the terminal only up to info
    stdout_log.setLevel(logging.INFO)

    for logger in (_logger, *[logging.getLogger("eko")]):
        logger.handlers = []
        logger.setLevel(LOGGING_SETTINGS["level"])
        logger.addHandler(log_file)
        logger.addHandler(stdout_log)

    output_files = []
    exportgrids = []

    for exportgrid_file in fit_folder.glob(f"nnfit/replica_*/{fit_folder.name}.exportgrid"):
        data = yaml_safe.load(exportgrid_file.read_text(encoding="UTF-8"))
        exportgrids.append(ExportGrid(**data, hessian=hessian_fit))
        output_files.append(exportgrid_file.with_suffix(".dat"))

    info_path = fit_folder / "nnfit" / f"{fit_folder.name}.info"
    evolve_exportgrids_into_lhapdf(eko_path, exportgrids, output_files, info_path)


[docs]def dump_evolved_replica(evolved_blocks, dump_folder, replica_num, hessian_fit=False):
    """
    Dump the evolved replica given by evolved_block as
        dump_folder / f"{dump_folder.stem}_{replica_num:04d}.dat"

    Parameters
    ----------
        evolved_block: list(numpy.array)
            list of blocks of an evolved PDF
        usr_path: pathlib.Path
            path of the fit folder
        replica_num: int
            replica number
        hessian_fit: bool
            wether the fit is hessian
    """
    # create folder to dump the evolved replica if it does not exist
    dump_folder.mkdir(exist_ok=True, parents=True)
    if hessian_fit:
        to_write_in_head = f"PdfType: error\n"
    else:
        to_write_in_head = f"PdfType: replica\nFromMCReplica: {replica_num}\n"
    genpdf.export.dump_blocks(dump_folder, replica_num, evolved_blocks, pdf_type=to_write_in_head)
    return dump_folder / f"{dump_folder.stem}_{replica_num:04d}.dat"