Source code for n3fit.io.writer

"""
    Module containing functions dedicated to the write down of the output of n3fit

    The goal is to generate the same folder/file structure as the old nnfit code
    so previously active scripts can still work.
"""

import json
import logging

import numpy as np

import n3fit
from n3fit import vpinterface
from reportengine.compat import yaml
import validphys

log = logging.getLogger(__name__)

XGRID = np.array(
    [
        1.00000000000000e-09,
        1.29708482343957e-09,
        1.68242903474257e-09,
        2.18225315420583e-09,
        2.83056741739819e-09,
        3.67148597892941e-09,
        4.76222862935315e-09,
        6.17701427376180e-09,
        8.01211109898438e-09,
        1.03923870607245e-08,
        1.34798064073805e-08,
        1.74844503691778e-08,
        2.26788118881103e-08,
        2.94163370300835e-08,
        3.81554746595878e-08,
        4.94908707232129e-08,
        6.41938295708371e-08,
        8.32647951986859e-08,
        1.08001422993829e-07,
        1.40086873081130e-07,
        1.81704331793772e-07,
        2.35685551545377e-07,
        3.05703512595323e-07,
        3.96522309841747e-07,
        5.14321257236570e-07,
        6.67115245136676e-07,
        8.65299922973143e-07,
        1.12235875241487e-06,
        1.45577995547683e-06,
        1.88824560514613e-06,
        2.44917352454946e-06,
        3.17671650028717e-06,
        4.12035415232797e-06,
        5.34425265752090e-06,
        6.93161897806315e-06,
        8.99034258238145e-06,
        1.16603030112258e-05,
        1.51228312288769e-05,
        1.96129529349212e-05,
        2.54352207134502e-05,
        3.29841683435992e-05,
        4.27707053972016e-05,
        5.54561248105849e-05,
        7.18958313632514e-05,
        9.31954227979614e-05,
        1.20782367731330e-04,
        1.56497209466554e-04,
        2.02708936328495e-04,
        2.62459799331951e-04,
        3.39645244168985e-04,
        4.39234443000422e-04,
        5.67535660104533e-04,
        7.32507615725537e-04,
        9.44112105452451e-04,
        1.21469317686978e-03,
        1.55935306118224e-03,
        1.99627451141338e-03,
        2.54691493736552e-03,
        3.23597510213126e-03,
        4.09103436509565e-03,
        5.14175977083962e-03,
        6.41865096062317e-03,
        7.95137940306351e-03,
        9.76689999624100e-03,
        1.18876139251364e-02,
        1.43298947643919e-02,
        1.71032279460271e-02,
        2.02100733925079e-02,
        2.36463971369542e-02,
        2.74026915728357e-02,
        3.14652506132444e-02,
        3.58174829282429e-02,
        4.04411060163317e-02,
        4.53171343973807e-02,
        5.04266347950069e-02,
        5.57512610084339e-02,
        6.12736019390519e-02,
        6.69773829498255e-02,
        7.28475589986517e-02,
        7.88703322292727e-02,
        8.50331197801452e-02,
        9.13244910278679e-02,
        9.77340879783772e-02,
        1.04252538208639e-01,
        1.10871366547237e-01,
        1.17582909372878e-01,
        1.24380233801599e-01,
        1.31257062945031e-01,
        1.38207707707289e-01,
        1.45227005135651e-01,
        1.52310263065985e-01,
        1.59453210652156e-01,
        1.66651954293987e-01,
        1.73902938455578e-01,
        1.81202910873333e-01,
        1.88548891679097e-01,
        1.95938145999193e-01,
        2.03368159629765e-01,
        2.10836617429103e-01,
        2.18341384106561e-01,
        2.25880487124065e-01,
        2.33452101459503e-01,
        2.41054536011681e-01,
        2.48686221452762e-01,
        2.56345699358723e-01,
        2.64031612468684e-01,
        2.71742695942783e-01,
        2.79477769504149e-01,
        2.87235730364833e-01,
        2.95015546847664e-01,
        3.02816252626866e-01,
        3.10636941519503e-01,
        3.18476762768082e-01,
        3.26334916761672e-01,
        3.34210651149156e-01,
        3.42103257303627e-01,
        3.50012067101685e-01,
        3.57936449985571e-01,
        3.65875810279643e-01,
        3.73829584735962e-01,
        3.81797240286494e-01,
        3.89778271981947e-01,
        3.97772201099286e-01,
        4.05778573402340e-01,
        4.13796957540671e-01,
        4.21826943574548e-01,
        4.29868141614175e-01,
        4.37920180563205e-01,
        4.45982706956990e-01,
        4.54055383887562e-01,
        4.62137890007651e-01,
        4.70229918607142e-01,
        4.78331176755675e-01,
        4.86441384506059e-01,
        4.94560274153348e-01,
        5.02687589545177e-01,
        5.10823085439086e-01,
        5.18966526903235e-01,
        5.27117688756998e-01,
        5.35276355048428e-01,
        5.43442318565661e-01,
        5.51615380379768e-01,
        5.59795349416641e-01,
        5.67982042055800e-01,
        5.76175281754088e-01,
        5.84374898692498e-01,
        5.92580729444440e-01,
        6.00792616663950e-01,
        6.09010408792398e-01,
        6.17233959782450e-01,
        6.25463128838069e-01,
        6.33697780169485e-01,
        6.41937782762089e-01,
        6.50183010158361e-01,
        6.58433340251944e-01,
        6.66688655093089e-01,
        6.74948840704708e-01,
        6.83213786908386e-01,
        6.91483387159697e-01,
        6.99757538392251e-01,
        7.08036140869916e-01,
        7.16319098046733e-01,
        7.24606316434025e-01,
        7.32897705474271e-01,
        7.41193177421404e-01,
        7.49492647227008e-01,
        7.57796032432224e-01,
        7.66103253064927e-01,
        7.74414231541921e-01,
        7.82728892575836e-01,
        7.91047163086478e-01,
        7.99368972116378e-01,
        8.07694250750291e-01,
        8.16022932038457e-01,
        8.24354950923382e-01,
        8.32690244169987e-01,
        8.41028750298844e-01,
        8.49370409522600e-01,
        8.57715163684985e-01,
        8.66062956202683e-01,
        8.74413732009721e-01,
        8.82767437504206e-01,
        8.91124020497459e-01,
        8.99483430165226e-01,
        9.07845617001021e-01,
        9.16210532771399e-01,
        9.24578130473112e-01,
        9.32948364292029e-01,
        9.41321189563734e-01,
        9.49696562735755e-01,
        9.58074441331298e-01,
        9.66454783914439e-01,
        9.74837550056705e-01,
        9.83222700304978e-01,
        9.91610196150662e-01,
        1.00000000000000e00,
    ]
)


[docs]class WriterWrapper: def __init__(self, replica_numbers, pdf_objects, stopping_object, all_chi2s, theory, timings): """ Initializes the writer for all replicas. This is decoupled from the writing of the fit in order to fix some of the variables which would be, in principle, be shared by several different history objects. Parameters ---------- `replica_numbers` indices of the replicas `pdf_objects` function to evaluate with a grid in x to generate a pdf `stopping_object` a stopping.Stopping object `all_chi2s` list of all the chi2s, in the order: tr_chi2, vl_chi2, true_chi2 `theory` theory information of the fit `timings` dictionary of the timing of the different events that happened """ self.replica_numbers = replica_numbers self.pdf_objects = pdf_objects self.stopping_object = stopping_object self.theory = theory self.timings = timings self.tr_chi2, self.vl_chi2, self.true_chi2 = all_chi2s
[docs] def write_data(self, save_path, fitname, weights_name): """ Save all the data of a fit, for all replicas. Parameters ---------- `save_path` path for the fit results, ex: `${PWD}/runcard_name/nnfit` `fitname` name of the fit, ex: `Basic_runcard` `weights_name` name of the file to save weights to, if not empty """ save_path.mkdir(exist_ok=True, parents=True) self.preprocessing = [] self.arc_lengths = [] self.integrability_numbers = [] for pdf_object in self.pdf_objects: self.preprocessing.append(pdf_object.get_preprocessing_factors()) self.arc_lengths.append(vpinterface.compute_arclength(pdf_object).tolist()) self.integrability_numbers.append( vpinterface.integrability_numbers(pdf_object).tolist() ) for i, rn in enumerate(self.replica_numbers): replica_path = save_path / f"replica_{rn}" replica_path.mkdir(exist_ok=True, parents=True) self._write_chi2s(replica_path / "chi2exps.log") self._write_metadata_json(i, replica_path / f"{fitname}.json") self._export_pdf_grid(i, replica_path / f"{fitname}.exportgrid") if weights_name: self._write_weights(i, replica_path / f"{weights_name}")
def _write_chi2s(self, out_path): # Note: same for all replicas, unless run separately chi2_log = self.stopping_object.chi2exps_json() with open(out_path, "w", encoding="utf-8") as fs: json.dump(chi2_log, fs, indent=2, cls=SuperEncoder) def _write_metadata_json(self, i, out_path): json_dict = jsonfit( best_epoch=self.stopping_object.e_best_chi2[i], positivity_status=self.stopping_object.positivity_statuses[i], preprocessing=self.preprocessing[i], arc_lengths=self.arc_lengths[i], integrability_numbers=self.integrability_numbers[i], tr_chi2=self.tr_chi2[i], vl_chi2=self.vl_chi2[i], true_chi2=self.true_chi2[i], # Note: the 2 arguments below are the same for all replicas, unless run separately timing=self.timings, stop_epoch=self.stopping_object.stop_epoch, ) with open(out_path, "w", encoding="utf-8") as fs: json.dump(json_dict, fs, indent=2, cls=SuperEncoder) fs.write('\n') log.info( "Best fit for replica #%d, chi2=%.3f (tr=%.3f, vl=%.3f)", self.replica_numbers[i], self.true_chi2[i], self.tr_chi2[i], self.vl_chi2[i], ) def _export_pdf_grid(self, i, out_path): storefit(self.pdf_objects[i], self.replica_numbers[i], out_path, self.theory) def _write_weights(self, i, out_path): log.info(" > Saving the weights for future in %s", out_path) # Extract model out of N3PDF model = self.pdf_objects[i]._models[0] model.save_weights(out_path)
[docs]class SuperEncoder(json.JSONEncoder): """Custom json encoder to get around the fact that np.float32 =/= float"""
[docs] def default(self, o): if isinstance(o, np.float32): return float(o) return super().default(o)
[docs]def jsonfit( best_epoch, positivity_status, preprocessing, arc_lengths, integrability_numbers, tr_chi2, vl_chi2, true_chi2, stop_epoch, timing, ): """Generates a dictionary containing all relevant metadata for the fit Parameters ---------- best_epoch: int epoch at which the best fit was found positivity_status: str string describing the positivity status of the fit preprocessing: dict dictionary of the preprocessing factors arc_lengths: list list of the arc lengths of the different PDFs integrability_numbers: list list of the integrability numbers of the different PDFs tr_chi2: float chi2 for the training vl_chi2: float chi2 for the validation true_chi2: float chi2 for the exp (unreplica'd data) epoch_stop: int epoch at which the stopping stopped (not the one for the best fit!) timing: dict dictionary of the timing of the different events that happened """ all_info = {} # Generate preprocessing information all_info["preprocessing"] = preprocessing # .fitinfo-like info all_info["stop_epoch"] = stop_epoch all_info["best_epoch"] = best_epoch all_info["erf_tr"] = tr_chi2 all_info["erf_vl"] = vl_chi2 all_info["chi2"] = true_chi2 all_info["pos_state"] = positivity_status all_info["arc_lengths"] = arc_lengths all_info["integrability"] = integrability_numbers all_info["timing"] = timing # Versioning info all_info["version"] = version() return all_info
[docs]def version(): """Generates a dictionary with misc version info for this run""" versions = {} try: # Wrap tf in try-except block as it could possible to run n3fit without tf import tensorflow as tf from tensorflow.python.framework import test_util versions["keras"] = tf.keras.__version__ mkl = test_util.IsMklEnabled() versions["tensorflow"] = f"{tf.__version__}, mkl={mkl}" except ImportError: versions["tensorflow"] = "Not available" versions["keras"] = "Not available" except AttributeError: # Check for MKL was only recently introduced and is not part of the official API versions["tensorflow"] = f"{tf.__version__}, mkl=??" except: # We don't want _any_ uncaught exception to crash the whole program at this point pass versions["numpy"] = np.__version__ versions["nnpdf"] = n3fit.__version__ try: versions["validphys"] = validphys.__version__ except AttributeError: versions["validphys"] = "unknown" return versions
[docs]def evln2lha(evln, nf=6): # evln Basis # {"PHT","SNG","GLU","VAL","V03","V08","V15","V24","V35","T03","T08","T15","T24","T35"}; # lha Basis: # {"TBAR","BBAR","CBAR","SBAR","UBAR","DBAR","GLUON","D","U","S","C","B","T","PHT"} lha = np.zeros(evln.shape) lha[13] = evln[0] lha[6] = evln[2] lha[8] = ( 10 * evln[1] + 30 * evln[9] + 10 * evln[10] + 5 * evln[11] + 3 * evln[12] + 2 * evln[13] + 10 * evln[3] + 30 * evln[4] + 10 * evln[5] + 5 * evln[6] + 3 * evln[7] + 2 * evln[8] ) / 120 lha[4] = ( 10 * evln[1] + 30 * evln[9] + 10 * evln[10] + 5 * evln[11] + 3 * evln[12] + 2 * evln[13] - 10 * evln[3] - 30 * evln[4] - 10 * evln[5] - 5 * evln[6] - 3 * evln[7] - 2 * evln[8] ) / 120 lha[7] = ( 10 * evln[1] - 30 * evln[9] + 10 * evln[10] + 5 * evln[11] + 3 * evln[12] + 2 * evln[13] + 10 * evln[3] - 30 * evln[4] + 10 * evln[5] + 5 * evln[6] + 3 * evln[7] + 2 * evln[8] ) / 120 lha[5] = ( 10 * evln[1] - 30 * evln[9] + 10 * evln[10] + 5 * evln[11] + 3 * evln[12] + 2 * evln[13] - 10 * evln[3] + 30 * evln[4] - 10 * evln[5] - 5 * evln[6] - 3 * evln[7] - 2 * evln[8] ) / 120 lha[9] = ( 10 * evln[1] - 20 * evln[10] + 5 * evln[11] + 3 * evln[12] + 2 * evln[13] + 10 * evln[3] - 20 * evln[5] + 5 * evln[6] + 3 * evln[7] + 2 * evln[8] ) / 120 lha[3] = ( 10 * evln[1] - 20 * evln[10] + 5 * evln[11] + 3 * evln[12] + 2 * evln[13] - 10 * evln[3] + 20 * evln[5] - 5 * evln[6] - 3 * evln[7] - 2 * evln[8] ) / 120 # if a heavy quark is not active at Q0 (the scale at which the output of the fit is stored), # keep the PDF values at 0.0 to prevent small negative values due to numerical instabilities # charm if nf > 3: lha[10] = ( 10 * evln[1] - 15 * evln[11] + 3 * evln[12] + 2 * evln[13] + 10 * evln[3] - 15 * evln[6] + 3 * evln[7] + 2 * evln[8] ) / 120 lha[2] = ( 10 * evln[1] - 15 * evln[11] + 3 * evln[12] + 2 * evln[13] - 10 * evln[3] + 15 * evln[6] - 3 * evln[7] - 2 * evln[8] ) / 120 # bottom if nf > 4: lha[11] = (5 * evln[1] - 6 * evln[12] + evln[13] + 5 * evln[3] - 6 * evln[7] + evln[8]) / 60 lha[1] = (5 * evln[1] - 6 * evln[12] + evln[13] - 5 * evln[3] + 6 * evln[7] - evln[8]) / 60 # top if nf > 5: lha[12] = (evln[1] - evln[13] + evln[3] - evln[8]) / 12 lha[0] = (evln[1] - evln[13] - evln[3] + evln[8]) / 12 return lha
[docs]def storefit(pdf_object, replica, out_path, theory): """ One-trick function which generates all output in the NNPDF format so that all other scripts can still be used. Parameters ---------- `pdf_object` N3PDF object constructed from the pdf_model that receives as input a point in x and returns an array of 14 flavours `replica` the replica index `out_path` the path where to store the output `theory` theory information of the fit """ # build exportgrid xgrid = XGRID.reshape(-1, 1) q0 = theory.get_description().get("Q0") q20 = q0**2 # Set the active flavours, by default u, d, s active_flavours = 3 # Now check whether c, b, t are active at Q0 for quark in ["c", "b", "t"]: mass = theory.get_description().get(f"m{quark}") threshold = theory.get_description().get(f"k{quark}Thr") if q0 < mass * threshold: break active_flavours += 1 # Double check IC if theory.get_description().get("IC") == 1: active_flavours = np.maximum(4, active_flavours) result = pdf_object(xgrid, flavours="n3fit").squeeze() lha = evln2lha(result.T, nf=active_flavours).T data = { "replica": replica, "q20": q20, "xgrid": xgrid.T.tolist()[0], "labels": [ "TBAR", "BBAR", "CBAR", "SBAR", "UBAR", "DBAR", "GLUON", "D", "U", "S", "C", "B", "T", "PHT", ], "pdfgrid": lha.tolist(), } with open(out_path, "w") as fs: yaml.dump(data, fs)