Source code for validphys.replica_selector

"""
replica_selector.py

Tools for filtering replica sets based on criteria on the replicas.
"""
import logging
import re
import shutil

from reportengine.checks import check, make_argcheck
from reportengine.compat import yaml
from validphys.core import PDF
from validphys.renametools import rename_pdf
from validphys.utils import tempfile_cleaner

log = logging.getLogger(__name__)


def _fixup_new_replica(alphas_pdf: PDF, new_replica_file):
    """Helper function that takes in a
    :py:class:`validphys.core.PDF` object as well as
    the path to the central replica corresponding to the
    PDF and handles the writing of the alphas values
    to the header file.
    """
    alphas_mz = alphas_pdf.alphas_mz
    alphas_vals = alphas_pdf.alphas_vals
    with open(new_replica_file, 'rb') as in_stream:
        data = in_stream.read()
    with open(new_replica_file, 'wb') as out_stream:
        # Add the AlphaS_MZ and AlphaS_Vals keys
        out_stream.write(f"AlphaS_MZ: {alphas_mz}\nAlphaS_Vals: {alphas_vals}\n".encode())
        out_stream.write(data)


@make_argcheck
def _check_target_name(target_name):
    """Make sure this specifies a name and not some kid of path"""
    if target_name is None:
        return
    check(
        re.fullmatch(r'[\w]+', target_name),
        "`target_name` must contain alphnumeric characters and underscores only",
    )



[docs]
@_check_target_name
def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None)) = None):
    """Action that bundles PDFs for distributing to the LHAPDF
    format. The baseline pdf is declared as the ``pdf`` key
    and the PDFs from which the replica 0s are to be added is
    declared as the ``pdfs`` list.

    The bundled PDF set is stored inside the ``output`` directory.

    Parameters
    ----------
    pdf: :py:class:`validphys.core.PDF`
        The baseline PDF to which the new replicas will be added
    pdfs: list of :py:class:`validphys.core.PDF`
        The list of PDFs from which replica0 will be appended
    target_name: str or None
        Optional argument specifying the name of the output PDF.
        If ``None``, then the name of the original pdf is used
        but with ``_pdfas`` appended
    """
    base_pdf_path = pdf.infopath.parent
    nrep = len(pdf)

    target_name = target_name or pdf.name + '_pdfas'
    target_path = output_path / target_name

    alphas_paths = [i.infopath.parent for i in pdfs]
    alphas_replica0s = [path / f'{p}_0000.dat' for path, p in zip(alphas_paths, pdfs)]
    new_nrep = nrep + len(alphas_replica0s)
    alphas_values = [str(p.alphas_mz) for p in pdfs]

    if target_path.exists():
        log.warning(f"{target_path} already exists. Deleting contents.")
        shutil.rmtree(target_path)

    # We create a temporary directory to handle the manipulations inside.
    # We move the files to the new directory at the end.
    with tempfile_cleaner(
        root=output_path, exit_func=shutil.rmtree, exc=KeyboardInterrupt
    ) as tempdir:
        # Copy the base pdf into the temporary directory
        temp_pdf = shutil.copytree(base_pdf_path, tempdir / pdf.name)

        # Copy the alphas PDF replica0s into the new PDF
        for i, (alphas_pdf, rep) in enumerate(zip(pdfs, alphas_replica0s)):
            to = temp_pdf / f'{pdf.name}_{str(i + nrep).zfill(4)}.dat'
            shutil.copy(rep, to)
            _fixup_new_replica(alphas_pdf, to)

        #  Fixup the info file
        info_file = (temp_pdf / temp_pdf.name).with_suffix('.info')

        with open(info_file, 'r') as stream:
            yaml_obj = yaml.YAML()
            info_yaml = yaml_obj.load(stream)
        info_yaml['NumMembers'] = new_nrep
        info_yaml['ErrorType'] += '+as'
        extra_desc = '; '.join(
            f"mem={i} => alphas(MZ)={val}" for val, i in zip(alphas_values, range(nrep, new_nrep))
        )
        info_yaml['SetDesc'] += f"; {extra_desc}"
        with open(info_file, 'w') as stream:
            yaml_obj.dump(info_yaml, stream)

        # Rename the base pdf to the final name
        rename_pdf(temp_pdf, pdf.name, target_name)
        # This is the pdf path after the above renaming
        # i.e new_pdf.exists() == True
        new_pdf = temp_pdf.with_name(target_name)
        # Move the final pdf outside the temporary directory
        new_pdf = new_pdf.rename(target_path)
    log.info(f"alpha_s bundle written at {new_pdf}")
    return target_name