"""
Custom hyperopt trial object for persistent file storage
in the form of json and pickle files within the nnfit folder
"""
import json
import logging
import pickle
from hyperopt import Trials, space_eval
from validphys.hyperoptplot import HyperoptTrial
log = logging.getLogger(__name__)
# Note: the plan would be to do a PR in hyperopt's main repository
# because these are things generic and useful enough that should be
# in hyperopt by default. But for now it will stay here.
[docs]
def space_eval_trial(space, trial):
"""
This function is a wrapper around hyperopt's space eval in order to add
to the json a dictionary containing the human-readable values.
i.e., the standard json would say: "optimizer = [5]" and we want it to say optimizer = "Adam"
But all this function does before calling hyperopt's space_eval is to "unlist" the items.
If you think space_eval should do that by itself, you are not alone
https://github.com/hyperopt/hyperopt/issues/383#issuecomment-378561408
# Arguments:
- `space`: the dictionary containing the hyperopt space samplers we pass
to the hyperparametrizable function
- `trial`: trial dictionary. This is a dictionary containing (among other things)
the list of parameters that were tried for this iteration of hyperopt
# Returns:
A dictionary containing the values of all the parameters in a human-readable format
"""
for_eval = {}
for key, values in trial["misc"]["vals"].items():
if values:
for_eval[key] = values[0]
else:
for_eval[key] = None
ret = space_eval(space, for_eval)
# If the result includes a trial, expand it
if isinstance(ret.get("parameters"), HyperoptTrial):
used_trial = ret.pop("parameters")
ret = dict(ret, **used_trial.params)
return ret
[docs]
class FileTrials(Trials):
"""
Stores trial results on the fly inside the nnfit replica folder
Parameters
----------
replica_path: path
Replica folder as generated by n3fit
parameters: dict
Dictionary of parameters on which we are doing hyperoptimization
"""
def __init__(self, replica_path, parameters=None, **kwargs):
self._store_trial = False
self._json_file = replica_path / "tries.json"
self.pkl_file = replica_path / "tries.pkl"
self._parameters = parameters
self._rstate = None
super().__init__(**kwargs)
@property
def rstate(self):
"""
Returns the rstate attribute.
Notes:
:func:`rstate` stores a `numpy.random.Generator` which is important to make
hyperopt restarts reproducible in the hyperparameter space. It can
be passed later as the `rstate` parameters of `hyperopt.fmin`.
"""
return self._rstate
@rstate.setter
def rstate(self, random_generator):
"""
Sets the rstate attribute.
# Arguments:
- `random_generator`: `numpy.random.Generator`
Example
--------
>>> import numpy as np
>>> from n3fit.hyper_optimization.filetrials import FileTrials
>>>
>>> trials = FileTrials(replica_path_set, parameters=parameters)
>>> trials.rstate = np.random.default_rng(42)
"""
self._rstate = random_generator
[docs]
def refresh(self):
"""
This is the "flushing" method which is called at the end of every trial to
save things in the database. We are are overloading it in order to also write
to a json file with every single trial.
"""
super().refresh()
# write json to disk
if self._store_trial:
log.info("Storing scan in %s", self._json_file)
local_trials = []
for idx, t in enumerate(self._dynamic_trials):
local_trials.append(t)
local_trials[idx]["misc"]["space_vals"] = space_eval_trial(self._parameters, t)
all_to_str = json.dumps(local_trials, default=str)
with open(self._json_file, "w") as f:
f.write(all_to_str)
# The two methods below are just a stupid overloading to avoid writing to the
# database twice
[docs]
def new_trial_ids(self, n):
self._store_trial = False
return super().new_trial_ids(n)
[docs]
def new_trial_docs(self, tids, specs, results, miscs):
self._store_trial = True
return super().new_trial_docs(tids, specs, results, miscs)
[docs]
def to_pkl(self):
"""Dump `FileTrials` object into a pickle file."""
with open(self.pkl_file, "wb") as file:
pickle.dump(self, file)
[docs]
@classmethod
def from_pkl(cls, pickle_filepath):
"""
Load and return an instance of `FileTrials` from a pickle file.
If a pickle file from previous run is present this method can be used
to instantiate an initial `FileTrials` object to restart.
"""
try:
with open(pickle_filepath, "rb") as file:
return pickle.load(file)
except FileNotFoundError as err:
raise FileNotFoundError(
"Failed to open 'tries.pkl' pickle file for restarting. "
f"Please ensure it is located in: {pickle_filepath}"
) from err