Source code for validphys.mc_gen

# -*- coding: utf-8 -*-
"""
mc_gen.py

Tools to check the pseudo-data MC generation.
"""
# The functions in this module have been ported to not use libNNPDF
# but they should not be used as an example as they follow the libNNPDF logic
import logging

from matplotlib.figure import Figure
import matplotlib.patches as mpatches
import numpy as np
import pandas as pd
from scipy.stats import moment as mom

from reportengine.figure import figure
from reportengine.table import table
from validphys import plotutils

log = logging.getLogger(__name__)


[docs]def art_rep_generation(groups_data, make_replicas): """Generates the nreplica pseudodata replicas""" real_data_list = [] for group in groups_data: # Load all the commondata real_group = group.load_commondata() real_data = np.concatenate([i.get_cv() for i in real_group]) real_data_list.append(real_data) real_data = np.concatenate(real_data_list) art_replicas = np.stack(make_replicas) art_data = np.mean(art_replicas, axis=0) normart_replicas = art_replicas / real_data return real_data, art_replicas, normart_replicas, art_data
[docs]@figure def art_data_residuals(art_rep_generation, color="green"): """ Plot the residuals distribution of pseudodata compared to experiment. """ real_data, _, _, art_data = art_rep_generation residuals = real_data - art_data normresiduals = residuals / real_data fig, ax = plotutils.subplots() ax.hist(normresiduals, bins=50, histtype="step", stacked=True, fill=False, color=color) ax.set_ylabel(r"Data points") ax.set_xlabel(r"$(D^0-<D^{(r)}>)/D^0$") ax.set_title(r"Residuals distribution") return fig
[docs]@figure def art_data_distribution(art_rep_generation, title="Artificial Data Distribution", color="green"): """ Plot of the distribution of pseudodata. """ real_data, _, _, art_data = art_rep_generation normart_data = art_data / real_data fig, ax = plotutils.subplots() ax.hist(normart_data, bins=50, histtype="step", stacked=True, fill=False, color=color) ax.set_ylabel(r"Data points") ax.set_xlabel(r"$<D^{(r)}>/D^0$") ax.set_title(title) return fig
[docs]@figure def art_data_moments(art_rep_generation, color="green"): """ Returns the moments of the distributions per data point, as a histogram. """ _, _, normart_replicas, _ = art_rep_generation artrep_array = np.asarray(normart_replicas) fig = Figure(figsize=(10, 12)) axes = [fig.add_subplot(3, 1, i + 1) for i in range(3)] # Plot histogram of moments for momno, ax in zip(range(1, 4), axes.flatten()): # Calculate moments moms = [] for _, datapoint in zip(range(len(artrep_array.T)), artrep_array.T): moment = mom(datapoint, moment=momno) moms.append(moment) ax.hist(moms, bins=50, histtype="step", stacked=True, fill=False, color=color) ax.set_ylabel("Data points") ax.set_xlabel(f"Moment {momno}") return fig
[docs]@figure def art_data_comparison(art_rep_generation, nreplica: int): """ Plots per datapoint of the distribution of replica values. """ real_data, _, normart_replicas, art_data = art_rep_generation artrep_array = np.asarray(normart_replicas) normart_data = art_data / real_data nrows = len(artrep_array.T) fig = Figure(figsize=(4, 2 * len(artrep_array.T))) axes = [fig.add_subplot(nrows, 1, i + 1) for i in range(nrows)] for i, ax, datapoint, normartdatapoint in zip( range(len(artrep_array.T)), axes, artrep_array.T, normart_data ): ax.hist(datapoint, bins=10, histtype="step", stacked=True, fill=False) extraString = f"Datapoint number = {i}" handles, _ = ax.get_legend_handles_labels() handles.append(mpatches.Patch(color="none", label=extraString)) ax.set_xlim(-0.5, 2.5) ax.set_ylim(0, 0.5 * nreplica) ax.vlines(1, ax.get_ylim()[0], ax.get_ylim()[1]) ax.vlines( normartdatapoint, ax.get_ylim()[0], ax.get_ylim()[1], linestyle="-", color="darkorchid" ) ax.vlines(0, ax.get_ylim()[0], ax.get_ylim()[1], linestyle="-", color="dodgerblue") ax.vlines(2, ax.get_ylim()[0], ax.get_ylim()[1], linestyle="-", color="dodgerblue") ax.legend(handles=handles) ax.set_xlabel(r"$D^{(r)}/D^0$") ax.set_ylabel("Frequency") return fig
[docs]@figure def one_art_data_residuals(groups_data, indexed_make_replicas): """ Residuals plot for the first datapoint. """ one_data_index = 0 all_replicas = pd.concat(indexed_make_replicas, axis=1) group_level = all_replicas.index.get_level_values("group") group_level = indexed_make_replicas[0].index.get_level_values("group") all_normresidual = [] for group in groups_data: real_group = group.load_commondata() real_data = np.concatenate([i.get_cv() for i in real_group]) one_art_data = all_replicas[group_level == group.name].iloc[one_data_index] residual = one_art_data - real_data[one_data_index] all_normresidual.append(residual / real_data[one_data_index]) fig, ax = plotutils.subplots() ax.hist(all_normresidual, bins=50, histtype="step", stacked=True, fill=False) ax.set_ylabel(r"replicas") ax.set_xlabel(r"$(D^{(r)}_{0} - D^0_{0})/D^0_{0}$") ax.set_title(r"Residual for Data Point 0") return fig
[docs]@table def art_data_mean_table(art_rep_generation, groups_data): """Generate table for artdata mean values""" real_data, _, _, art_data = art_rep_generation data = [] for group in groups_data: for dataset in group.datasets: Ndata = dataset.load_commondata().ndata for i in range(Ndata): line = [dataset.name, art_data[i], real_data[i], abs(art_data[i] - real_data[i])] data.append(line) df = pd.DataFrame(data, columns=["DataSet", "ArtData", "ExpData", "abs(residual)"]) return df