Source code for n3fit.hyper_optimization.filetrials

"""
    Custom hyperopt trial object for persistent file storage
    in the form of json and pickle files within the nnfit folder
"""
import json
import logging
import pickle

from hyperopt import Trials, space_eval

from validphys.hyperoptplot import HyperoptTrial

log = logging.getLogger(__name__)

# Note: the plan would be to do a PR in hyperopt's main repository
# because these are things generic and useful enough that should be
# in hyperopt by default. But for now it will stay here.



[docs]
def space_eval_trial(space, trial):
    """
    This function is a wrapper around hyperopt's space eval in order to add
    to the json a dictionary containing the human-readable values.
    i.e., the standard json would say: "optimizer = [5]" and we want it to say optimizer = "Adam"
    But all this function does before calling hyperopt's space_eval is to "unlist" the items.
    If you think space_eval should do that by itself, you are not alone
    https://github.com/hyperopt/hyperopt/issues/383#issuecomment-378561408

    # Arguments:
        - `space`: the dictionary containing the hyperopt space samplers we pass
                   to the hyperparametrizable function
        - `trial`: trial dictionary. This is a dictionary containing (among other things)
                   the list of parameters that were tried for this iteration of hyperopt

    # Returns:
        A dictionary containing the values of all the parameters in a human-readable format
    """
    for_eval = {}
    for key, values in trial["misc"]["vals"].items():
        if values:
            for_eval[key] = values[0]
        else:
            for_eval[key] = None
    ret = space_eval(space, for_eval)
    # If the result includes a trial, expand it
    if isinstance(ret.get("parameters"), HyperoptTrial):
        used_trial = ret.pop("parameters")
        ret = dict(ret, **used_trial.params)
    return ret




[docs]
class FileTrials(Trials):
    """
    Stores trial results on the fly inside the nnfit replica folder

    Parameters
    ----------
        replica_path: path
            Replica folder as generated by n3fit
        parameters: dict
            Dictionary of parameters on which we are doing hyperoptimization
    """

    def __init__(self, replica_path, parameters=None, **kwargs):
        self._store_trial = False
        self._json_file = replica_path / "tries.json"
        self.pkl_file = replica_path / "tries.pkl"
        self._parameters = parameters
        self._rstate = None
        super().__init__(**kwargs)

    @property
    def rstate(self):
        """
        Returns the rstate attribute.

        Notes:
            :func:`rstate` stores a `numpy.random.Generator` which is important to make
            hyperopt restarts reproducible in the hyperparameter space. It can
            be passed later as the `rstate` parameters of `hyperopt.fmin`.
        """
        return self._rstate

    @rstate.setter
    def rstate(self, random_generator):
        """
        Sets the rstate attribute.

        # Arguments:
            - `random_generator`: `numpy.random.Generator`

        Example
        --------
        >>> import numpy as np
        >>> from n3fit.hyper_optimization.filetrials import FileTrials
        >>>
        >>> trials = FileTrials(replica_path_set, parameters=parameters)
        >>> trials.rstate = np.random.default_rng(42)
        """
        self._rstate = random_generator


[docs]
    def refresh(self):
        """
        This is the "flushing" method which is called at the end of every trial to
        save things in the database. We are are overloading it in order to also write
        to a json file with every single trial.
        """
        super().refresh()

        # write json to disk
        if self._store_trial:
            log.info("Storing scan in %s", self._json_file)
            local_trials = []
            for idx, t in enumerate(self._dynamic_trials):
                local_trials.append(t)
                local_trials[idx]["misc"]["space_vals"] = space_eval_trial(self._parameters, t)

            all_to_str = json.dumps(local_trials, default=str)
            with open(self._json_file, "w") as f:
                f.write(all_to_str)


    # The two methods below are just a stupid overloading to avoid writing to the
    # database twice

[docs]
    def new_trial_ids(self, n):
        self._store_trial = False
        return super().new_trial_ids(n)



[docs]
    def new_trial_docs(self, tids, specs, results, miscs):
        self._store_trial = True
        return super().new_trial_docs(tids, specs, results, miscs)



[docs]
    def to_pkl(self):
        """Dump `FileTrials` object into a pickle file."""
        with open(self.pkl_file, "wb") as file:
            pickle.dump(self, file)



[docs]
    @classmethod
    def from_pkl(cls, pickle_filepath):
        """
        Load and return an instance of `FileTrials` from a pickle file.

        If a pickle file from previous run is present this method can be used
            to instantiate an initial `FileTrials` object to restart.
        """
        try:
            with open(pickle_filepath, "rb") as file:
                return pickle.load(file)
        except FileNotFoundError as err:
            raise FileNotFoundError(
                "Failed to open 'tries.pkl' pickle file for restarting. "
                f"Please ensure it is located in: {pickle_filepath}"
            ) from err