Source code for validphys.paramfits.plots

"""
plots.py

Plots for the paramfits package.
"""
import logging
import numbers

import numpy as np
import pandas as pd
from scipy import stats

from reportengine.floatformatting import format_number
from reportengine.checks import make_argcheck, CheckError, check_positive
from reportengine.figure import figure, figuregen

from validphys import plotutils
from validphys.plotutils import barplot, kde_plot, marker_iter_plot, plot_horizontal_errorbars
from validphys.paramfits.dataops import check_dataset_items, get_parabola

log = logging.getLogger(__name__)

[docs]@figure def plot_fits_as_profile(fits_as, fits_total_chi2, suptitle=None): """Plot the total central chi² as a function of the value of α_s. Note that this plots as a function of the key "AlphaS_MZ" in the LHAPDF file, which is annoyingly *not* α_s(MZ) for Nf<5.""" fig, ax = plotutils.subplots() alphas = fits_as #Could be a transposed data frame fits_total_chi2 = np.ravel(fits_total_chi2) ax.plot(alphas, fits_total_chi2) ax.set_xlabel(r'$\alpha_S$') ax.set_ylabel(r'$\chi²$') if suptitle: fig.suptitle(suptitle) return fig
[docs]@figure def plot_as_central_parabola( fits_as, as_central_parabola, suptitle, ndata, parabolic_as_determination_for_total, markmin:bool=False): """Plot a parabola with the central chi² per number of points, marking the chi² at the total best fit.""" fig, ax = plotutils.subplots() asarr = np.linspace(min(fits_as), max(fits_as), 100) as_central_parabola = np.asarray(as_central_parabola) ndata = int(ndata) ax.plot(asarr, np.polyval(as_central_parabola, asarr)/ndata) best_as = parabolic_as_determination_for_total[0].location chi2_at_best = np.polyval(as_central_parabola, best_as)/ndata ax.scatter(best_as, chi2_at_best) ax.annotate(format_number(chi2_at_best, 3), (best_as, chi2_at_best)) if markmin: ax.axvline(-as_central_parabola[1]/2/as_central_parabola[0]) ax.set_ylabel(r'$\chi^2/N_{data}$') ax.set_xlabel(r'$\alpha_S$') ax.set_title(f"{suptitle}") return fig
[docs]@figure def plot_as_cummulative_central_chi2(fits_as, as_datasets_central_parabolas, central_by_dataset_suptitle): """Plot the cumulative total chi² for each of the datasets""" fig, ax = plotutils.subplots() nx = 100 asarr = np.linspace(min(fits_as), max(fits_as), nx) last = np.zeros(nx) for (p, label) in zip(as_datasets_central_parabolas, central_by_dataset_suptitle): val = last + np.polyval(p, asarr) ax.fill_between(asarr, last, val, label=label) last = val ax.legend() ax.set_ylabel(r'$\chi^2$') ax.set_xlabel(r'$\alpha_S$') ax.set_ylim(0) ax.set_xlim(asarr[[0,-1]]) return fig
[docs]@figure def plot_as_cummulative_central_chi2_diff(fits_as, as_datasets_central_parabolas, central_by_dataset_suptitle, parabolic_as_determination_for_total, ymax:(numbers.Real, type(None))=None): """Plot the cumulative difference between the χ² at the best global αs fit and the χ² at αs. If the difference is negative, it is set to zero. """ fig, ax = plotutils.subplots() nx = 100 best_as = parabolic_as_determination_for_total[0].location asarr = np.linspace(min(fits_as), max(fits_as), nx) last = np.zeros(nx) for (p, label) in zip(as_datasets_central_parabolas, central_by_dataset_suptitle): delta = np.polyval(p, asarr) - np.polyval(p, best_as) delta[delta<0] = 0 val = last + delta ax.fill_between(asarr, last, val, label=label) last = val ax.legend() ax.set_ylabel(r'$\chi^2 - \chi^2_{{\rm best}\ \alpha_S}$') ax.set_xlabel(r'$\alpha_S$') ax.set_ylim(0) if ymax is not None: ax.set_ylim(ymax=ymax) ax.set_xlim(asarr[[0,-1]]) return fig
[docs]@figure def plot_as_cummulative_central_chi2_diff_underflow( fits_as, as_datasets_central_parabolas, central_by_dataset_suptitle, parabolic_as_determination_for_total, ymax:(numbers.Real, type(None))=None): """Plot the cumulative difference between the χ² at the best global αs fit and the χ² at αs. If the difference is negative, it is set to zero. """ fig, ax = plotutils.subplots() nx = 100 best_as = parabolic_as_determination_for_total[0].location asarr = np.linspace(min(fits_as), max(fits_as), nx) #ordering = [np.polyval(p, best_as + 0.001) - np.polyval(p, best_as) for p in as_datasets_central_parabolas] #ordered_indexes = sorted(range(len(ordering)), key=ordering.__getitem__) #as_datasets_central_parabolas = [as_datasets_central_parabolas[i] for i in ordered_indexes] #central_by_dataset_suptitle = [central_by_dataset_suptitle[i] for i in ordered_indexes] last = np.zeros(nx) for (p, label) in zip(as_datasets_central_parabolas, central_by_dataset_suptitle): delta = np.polyval(p, asarr) - np.polyval(p, best_as) delta[delta<0] = 0 val = last + delta ax.fill_between(asarr, last, val, label=label) last = val last = np.zeros(nx) for (p, label) in zip(as_datasets_central_parabolas, central_by_dataset_suptitle): delta = np.polyval(p, asarr) - np.polyval(p, best_as) delta[delta>0] = 0 val = last + delta ax.fill_between(asarr, last, val) last = val ax.legend() ax.set_ylabel(r'$\chi^2 - \chi^2_{{\rm best}\ \alpha_S}$') ax.set_xlabel(r'$\alpha_S$') ax.set_xlim(asarr[[0,-1]]) if ymax is not None: ax.set_ylim(ymax=ymax) ymin = np.min(last) ax.set_ylim(ymin=ymin) return fig
[docs]@figure def plot_as_cummulative_central_chi2_diff_negative( fits_as, as_datasets_central_parabolas, central_by_dataset_suptitle, parabolic_as_determination_for_total): """Plot the cumulative difference between the χ² at the best global αs fit and the χ² at αs. If the difference is negative, it is set to zero. """ """Plot the cumulative difference between the χ² at the best global αs fit and the χ² at αs. If the difference is negative, it is set to zero. """ fig, ax = plotutils.subplots() nx = 100 best_as = parabolic_as_determination_for_total[0].location asarr = np.linspace(min(fits_as), max(fits_as), nx) last = np.zeros(nx) for (p, label) in zip(as_datasets_central_parabolas, central_by_dataset_suptitle): delta = np.polyval(p, asarr) - np.polyval(p, best_as) delta[delta>0] = 0 val = last + delta ax.fill_between(asarr, last, val, label=label) last = val #Seems that fill_between doesn't compute for the legend location ax.legend(loc='lower left') ax.set_ylabel(r'$\chi^2 - \chi^2_{{\rm best}\ \alpha_S}$') ax.set_xlabel(r'$\alpha_S$') ymin = np.min(last) ax.set_ylim(ymin=ymin) ax.set_xlim(asarr[[0,-1]]) return fig
[docs]@figuregen def plot_dataspecs_central_parabolas( dataspecs_as_central_parabolas_map, dataspecs_fits_as): """Plot the parabolas resulting from the chi² of the mean PDF to the data, as a function of alpha_S. Yield one plot per ``dataset_item`` comparing several dataspecs.""" #Note that cannot cast as a matrix if shapes are different limits = min(map(min, dataspecs_fits_as)), max(map(max, dataspecs_fits_as)) alphasaxis = np.linspace(*limits, 100) for dsname, dataspecs_parabolas in dataspecs_as_central_parabolas_map.items(): fig, ax = plotutils.subplots() for label, parabola in dataspecs_parabolas.items(): ax.plot(alphasaxis, np.polyval(parabola, alphasaxis), label=label) ax.set_ylabel(r'$\chi^2/N_{dat}$') ax.set_xlabel(r'$\alpha_S$') #ax.set_ylim(0) ax.set_title(dsname) ax.set_xlim(alphasaxis[[0,-1]]) ax.legend() yield fig
[docs]@figure def plot_as_datasets_pseudoreplicas_chi2(as_datasets_pseudoreplicas_chi2): """Plot the error bars of the αs determination from pseudoreplicas by dataset item. Note that this only has meaning of preferred value for "Total", and the rest of the values are the minima of the partial χ².""" data, names = zip(*as_datasets_pseudoreplicas_chi2) cv, err = zip(*[(np.mean(dt), np.std(dt)) for dt in data]) fig, ax = plot_horizontal_errorbars([cv], [err], names) ax.set_xlabel(r"$\alpha_S$") ax.set_title(r"$\alpha_S$ from pseudoreplicas") return fig
[docs]@figure def plot_as_exepriments_central_chi2(as_datasets_central_chi2): """Plot the error bars of the αs determination from central χ² by dataset item. Note that this only has meaning of preferred value for "Total", and the rest of the values are the minima of the partial χ².""" data, names = zip(*as_datasets_central_chi2) cv, err = zip(*data) fig, ax = plot_horizontal_errorbars([cv], [err], names) ax.set_xlabel(r"$\alpha_S$") ax.set_title(r"$\alpha_S$ from central chi²") return fig
[docs]@figure def plot_as_datasets_compare(as_datasets_pseudoreplicas_chi2, as_datasets_central_chi2, marktotal:bool=True): """Plot the result of ``plot_as_datasets_pseudoreplicas_chi2`` and ``plot_as_exepriments_central_chi2`` together.""" datapseudo, namespseudo = zip(*as_datasets_pseudoreplicas_chi2) cvpseudo, errpseudo = zip(*[(np.mean(dt), np.std(dt)) for dt in datapseudo]) datacentral, namescentral = zip(*as_datasets_central_chi2) cvcentral, errcentral = zip(*datacentral) if namespseudo != namescentral: raise RuntimeError("Names do not coincide") fig, ax = plot_horizontal_errorbars( [cvcentral, cvpseudo], [errcentral, errpseudo], namescentral, [r'Central $\chi^2$', r'Pseudoreplica $\chi^2$'] ) if marktotal: try: pos = namespseudo.index('Total') except ValueError: log.error("Asked to mark total, but it was not provided.") else: ax.axvline(cvcentral[pos], color='C0', linewidth=0.5, linestyle='--') ax.axvline(cvpseudo[pos], color='C1', linewidth=0.5, linestyle='--') ax.set_xlabel(r"$\alpha_S$") ax.set_title(r"$\alpha_S$ determination") ax.legend() return fig
[docs]@figure def plot_dataspecs_as_value_error(dataspecs_as_value_error_table_impl, dataspecs_fits_as, marktotal:bool=True, fix_limits:bool=True): """ Plot the result for each dataspec of the pseudoreplica alpha_s determination based on the partial chi² for each ``dataset_item``. If ``marktotal`` is True, a vertical line will appear marking the position of the best fit. If ``fix_limits`` is True, the limits of the plot will span all the fitted values. Otherwise an heuristic will be used. """ df = dataspecs_as_value_error_table_impl datalabels = df.columns.levels[0] catlabels = list(df.index) cvs = df.loc[:, (slice(None), 'mean')].T.values errors = df.loc[:, (slice(None), 'error')].T.values if fix_limits: minlim = min(min(x for x in dataspecs_fits_as)) maxlim = max(max(x for x in dataspecs_fits_as)) lims = minlim, maxlim else: lims = None fig, ax = plot_horizontal_errorbars( cvs, errors, catlabels, datalabels, xlim = lims ) if marktotal: try: pos = catlabels.index('Total') except ValueError: log.error("Asked to mark total, but it was not provided.") else: for i,cv in enumerate(cvs): ax.axvline(cv[pos], color=f'C{i%10}', linewidth=0.5, linestyle='--') ax.set_xlabel(r"$\alpha_S$") ax.set_title(r"$\alpha_S$ determination") ax.legend() return fig
[docs]@figure def plot_dataspecs_as_value_error_comparing_with_central( dataspecs_as_value_error_table_impl, as_datasets_central_chi2, dataspecs_fits_as, speclabel, marktotal:bool=True, fix_limits:bool=True): """ This is an aberration we need to do for the paper plots. It compares the central (old) and new partial chi². """ df = dataspecs_as_value_error_table_impl catlabels = list(df.index) replica_cvs = df.loc[:, (speclabel, 'mean')].T.values replica_errors = df.loc[:, (speclabel, 'error')].T.values datacentral, namescentral = zip(*as_datasets_central_chi2) cvcentral, errcentral = zip(*datacentral) if fix_limits: minlim = min(min(x for x in dataspecs_fits_as)) maxlim = max(max(x for x in dataspecs_fits_as)) lims = minlim, maxlim else: lims = None cvs = np.c_[cvcentral, replica_cvs].T errors = np.c_[errcentral, replica_errors].T fig, ax = plot_horizontal_errorbars( cvs, errors, catlabels, [r'$\Delta \chi^2=1$', 'Replicas'], xlim = lims ) if marktotal: try: pos = catlabels.index('Total') except ValueError: log.error("Asked to mark total, but it was not provided.") else: for i,cv in enumerate(cvs): ax.axvline(cv[pos], color=f'C{i%10}', linewidth=0.5, linestyle='--') ax.set_xlabel(r"$\alpha_S$") ax.set_title(r"$\alpha_S$ determination") ax.legend() return fig
@make_argcheck def _check_first_is_total(fits_central_chi2_by_experiment_and_dataset): l = fits_central_chi2_by_experiment_and_dataset if not l or l[0]['experiment_label'] != 'Total': raise CheckError("Expecting that the first experiment is the total. You may need to set prepend_total=True")
[docs]@figure @_check_first_is_total def plot_as_value_error_central(as_datasets_central_chi2, marktotal:bool=True): """Plot the result of ``plot_as_datasets_pseudoreplicas_chi2`` and ``plot_as_exepriments_central_chi2`` together.""" datacentral, namescentral = zip(*as_datasets_central_chi2) cvcentral, errcentral = zip(*datacentral) fig, ax = plot_horizontal_errorbars( [cvcentral], [errcentral], namescentral, [r'Central'] ) ax.axvline(cvcentral[0], color=f'C{0}', linewidth=0.5, linestyle='--') ax.set_xlabel(r"$\alpha_S$") ax.set_xlim(0.110,0.125) ax.set_title(r"$\alpha_S$ determination") ax.legend() return fig
# Pull plots def _pulls_func(cv,alphas_global,error,error_global): """Small definition to compute pulls""" return error_global*((cv-alphas_global)/(error**2))
[docs]@figure @_check_first_is_total def plot_pulls_central(as_datasets_central_chi2,hide_total:bool=True): """ Plots the pulls per experiment for the central results """ data, names = zip(*as_datasets_central_chi2) cv, err = zip(*data) pulls = list() if hide_total: for i in range(1,len(cv)): pulls.append(_pulls_func(cv[i],cv[0],err[i],err[0])) names = [x for x in names if x!='Total'] else: for i in range(0,len(cv)): pulls.append(_pulls_func(cv[i],cv[0],err[i],err[0])) fig, ax = barplot(pulls, names, " ", orientation="horizontal") ax.legend() return fig
[docs]@figure @_check_first_is_total def plot_pull_gaussian_fit_central(as_datasets_central_chi2, dataspecs_fits_as,dataspecs_speclabel,hide_total:bool=True): """Bins the pulls and overlays the normalised gaussian fit and KDE to the histogram of pulls""" data, names = zip(*as_datasets_central_chi2) cv, err = zip(*data) pulls = list() if hide_total: for i in range(1,len(cv)): pulls.append(_pulls_func(cv[i],cv[0],err[i],err[0])) names = [x for x in names if x!='Total'] else: for i in range(0,len(cv)): pulls.append(_pulls_func(cv[i],cv[0],err[i],err[0])) mean_pulls = np.mean(pulls) std_dev = np.std(pulls) x = np.linspace(min(pulls),max(pulls), 100) kde_pulls = stats.gaussian_kde(pulls, bw_method='silverman') fig, ax = plotutils.subplots() #ax.set_title(f"Histogram of pulls for {label} dataset") ax.set_xlabel(r"Pull") ax.plot(x, kde_pulls(x), label="Kernel Density Estimation of pulls") ax.hist(pulls,normed=True,bins=4) ax.grid(False) normdist = stats.norm(mean_pulls, std_dev) ax.plot(x, normdist.pdf(x),label="Normalised gaussian fit") ax.legend() return fig
[docs]@figure def plot_pull_plots_global_min(dataspecs_as_value_error_table_impl, dataspecs_fits_as,dataspecs_speclabel,hide_total:bool=True): """Plots the pulls of individual experiments as a barplot.""" df = dataspecs_as_value_error_table_impl tots_error = df.loc['Total', (slice(None), 'error')].values tots_mean = df.loc['Total', (slice(None), 'mean')].values if hide_total: df = df.loc[df.index != 'Total'] catlabels = list(df.index) cvs = df.loc[:, (slice(None), 'mean')].values errors = df.loc[:, (slice(None), 'error')].values pulls = _pulls_func(cvs,tots_mean,errors,tots_error).T pulls = np.asarray(pulls,dtype=float) #Compute sums #sp = np.atleast_2d(np.nansum(pulls, axis=1)).T #pulls =np.concatenate([pulls, sp], axis=1) #fig, ax = barplot(pulls, [*catlabels, 'sum'], dataspecs_speclabel, orientation="horizontal") fig, ax = barplot(pulls, catlabels, dataspecs_speclabel, orientation="horizontal") #ax.set_title(r"Pulls per experiment") ax.legend() return fig
@make_argcheck def _check_two_speclabels(dataspecs_speclabel): if len(dataspecs_speclabel) != 2: raise CheckError("Need 2 data specs")
[docs]@figure @_check_two_speclabels def alphas_shift( dataspecs_as_value_error_table_impl, dataspecs_quad_table_impl, dataspecs_ndata_table, dataspecs_dataset_ndata, dataspecs_fits_as, dataspecs_speclabel, hide_total:bool=True, ndata_weight:bool=False): """Plots NNLO - NLO alphas values for each experiment - i.e. the shift in the best fit alphas for each process (as it currently stands...) wrt the global best fit alphas at NLO or NNLO. Also contains some computations for estimating MHOU, using either the number of data points per experiment/process (ndata) or the quadratic coefficient of the parabolic fit (quad_weights)""" df1 = dataspecs_ndata_table df = dataspecs_as_value_error_table_impl df2 = dataspecs_quad_table_impl tots_mean = df.loc['Total', (slice(None), 'mean')].values if hide_total: df = df.loc[df.index != 'Total'] df1 = df1.loc[df1.index != 'Total'] df2 = df2.loc[df2.index != 'Total'] cvs = df.loc[:, (slice(None), 'mean')].T.values quad_weights = df2.loc[:, (slice(None), 'mean')].T.values catlabels = list(df.index) alphas_shift = [] nnlo_alphas_global_shift = [] nlo_alphas_global_shift = [] nnlo2_alphas_global_shift = [] for i in range(0,len(cvs[0])): alphas_shift.append(cvs[1][i]-cvs[0][i]) nnlo2_alphas_global_shift.append((cvs[1][i]-tots_mean[1])**2) nlo_alphas_global_shift.append((cvs[0][i]-tots_mean[0])**2) nnlo_alphas_global_shift.append(cvs[1][i]-tots_mean[1]) weights_nlo = [] weights_nnlo = [] weights_nnlo_sq = [] weights_nlo_sq = [] if ndata_weight: ndataptsnlo = df1.iloc[:,0] ndataptsnnlo = df1.iloc[:,1] for i in range(0,len(ndataptsnlo)): weights_nlo.append(ndataptsnlo[i]) weights_nnlo.append(ndataptsnnlo[i]) else: for i in range(0,len(quad_weights.T)): weights_nlo.append(quad_weights[0][i]) weights_nnlo.append(quad_weights[1][i]) weights_nnlo_sq.append((quad_weights[1][i])**2) weights_nlo_sq.append((quad_weights[0][i])**2) term1, term2 = dataspecs_speclabel fig, ax = barplot(alphas_shift, catlabels, " ", orientation = "horizontal") ax.set_title(f"{term2} - {term1} shifts") ax.legend() return fig
[docs]@figuregen def plot_pull_gaussian_fit_pseudo(dataspecs_as_value_error_table_impl, dataspecs_fits_as,dataspecs_speclabel,hide_total:bool=True): """Bins the pulls computed in pull_plots_global_min and overlays the normalised gaussian fit and KDE to the histogram of pulls""" df = dataspecs_as_value_error_table_impl tots_error = df.loc['Total', (slice(None), 'error')].T.values tots_mean = df.loc['Total', (slice(None), 'mean')].T.values if hide_total: df = df.loc[df.index != 'Total'] cvs = df.loc[:, (slice(None), 'mean')].T.values errors = df.loc[:, (slice(None), 'error')].T.values for label, i in zip(dataspecs_speclabel, range(len(cvs))): pulls = _pulls_func(cvs[i],tots_mean[i],errors[i],tots_error[i]) mean_pulls = np.mean(pulls) std_dev = np.std(pulls) x = np.linspace(min(pulls),max(pulls), 100) kde_pulls = stats.gaussian_kde(pulls, bw_method='silverman') fig, ax = plotutils.subplots() #ax.set_title(f"Histogram of pulls for {label} dataset") ax.set_xlabel(r"Pull") ax.plot(x, kde_pulls(x), label="Kernel Density Estimation of pulls") ax.hist(pulls,normed=True,bins=4) ax.grid(False) normdist = stats.norm(mean_pulls, std_dev) ax.plot(x, normdist.pdf(x) ,label="Normalised gaussian fit") ax.legend() yield fig
[docs]@figure def plot_fitted_replicas_as_profiles_matched(fits_as, fits_replica_data_with_discarded_replicas, parabolic_as_determination, suptitle=None): """Plot chi²(as) keeping the replica nnfit index matched. The ``max_ndiscarded`` parameter defines th number of points discarded by postfit from which we discard the curve. """ alphas = fits_as minimums = parabolic_as_determination table = fits_replica_data_with_discarded_replicas.values fig, ax = plotutils.subplots() from matplotlib.collections import LineCollection lc = LineCollection([list(x for x in zip(alphas, t) if np.isfinite(x[1])) for t in table]) lc.set_array(minimums.data) lc.set_clim(*np.percentile(minimums.data, (5,95))) ax.add_collection(lc) ax.set_xlim(min(alphas), max(alphas)) ax.set_ylim(np.nanmin(table), np.nanmax(table)) fig.colorbar(lc, label=r"Preferred $\alpha_S$") ax.set_title(rf"$\alpha_S$ = ${minimums.location:.4f} \pm {minimums.scale:.4f}$ N={len(minimums.data)}") if suptitle: fig.suptitle(suptitle) #ax.plot(alphas, np.array(table).T, color='#ddddcc') ax.set_xlabel(r'$\alpha_S$') ax.set_ylabel(r'$\chi^2$') return fig
[docs]@figuregen @check_dataset_items def plot_dataspecs_pseudoreplica_means( dataspecs_chi2_by_dataset_dict, dataspecs_speclabel, dataset_items:(list, type(None))=None, ): """ Plot the mean chi² from data to pseudoreplica, over replicas in a fit and comparing dataspecs. """ if dataset_items is None: dataset_items = list(dataspecs_chi2_by_dataset_dict) for it in dataset_items: fig, ax = plotutils.subplots() for label, tb in zip(dataspecs_speclabel, dataspecs_chi2_by_dataset_dict[it]): if tb is None: #Advance color cycle so all dataspecs get the same color ax._get_lines.get_next_color() continue m = tb.mean(axis=0) ax.plot(np.asarray(m.index),np.asarray(m), label=label) ax.set_title(it) ax.set_xlabel(r'$\alpha_S$') ax.set_ylabel(r'$\left<\chi^2\right>$') ax.legend() yield fig
#TODO: This should take parabolic_as_determination with the as_transforms #and so on, rather that refitting here.
[docs]@figuregen @check_dataset_items @check_positive('examples_per_item') def plot_dataspecs_parabola_examples( dataspecs_chi2_by_dataset_dict, dataspecs_speclabel, dataset_items:(list, type(None))=None, examples_per_item:int = 2, random_seed:int = 0, ): """Sample ``examples_per_item`` replica_indexes for each of the ``dataset_items``. Yield a plot with the parabolic fit, as resolved for each of the dataspecs. The random state is local to the function and controlled by ``random_seed``.""" random_state = np.random.RandomState(random_seed) if dataset_items is None: dataset_items = list(dataspecs_chi2_by_dataset_dict) for it in dataset_items: table = pd.concat(dataspecs_chi2_by_dataset_dict[it], join='inner', axis=1, keys=dataspecs_speclabel) if examples_per_item > len(table): log.warning("Length of table is less than examples_per_item") sampled = table else: sampled = table.sample(examples_per_item, random_state=random_state) for index, row in sampled.iterrows(): fig, ax = plotutils.subplots() im = marker_iter_plot() ax.set_title(f"Parabola example for {it} (nnfit_index={index})") for i, (label, vals) in enumerate(row.groupby(level=0)): asvals = vals.index.get_level_values(1) color = f'C{i}' y = vals.values ax.plot(asvals, y, **next(im), label=label, color=color, linestyle='none', lw=0.5) a,b,c = parabola = get_parabola(asvals, y) ax.plot(asvals, np.polyval(parabola,asvals), color=color, linestyle='--', lw=0.5) m = -b/2/a if asvals[0] < m < asvals[-1]: ax.axvline(m , color=color, lw=0.4) ax.set_xlabel(r'$\alpha_S$') ax.set_ylabel(r'$\chi^2$') ax.legend( ) yield fig
[docs]@figure def plot_as_distribution(parabolic_as_determination, suptitle): """Histograms of the values of alphas produced, with the datapoints in an array as sticks on an axis""" distribution = parabolic_as_determination.data fig, ax = plotutils.subplots() kde_plot(distribution) ax.legend() ax.set_title(f"{suptitle}") ax.set_xlabel(r"$\alpha_S$") return fig
[docs]@figure def plot_total_as_distribution_dataspecs( dataspecs_parabolic_as_determination_for_total, dataspecs_speclabel, ): """Compare the total alpha_s distributions across dataspecs. See ``plot_as_distribution``.""" fig, ax = plotutils.subplots() for dist, label in zip( dataspecs_parabolic_as_determination_for_total, dataspecs_speclabel): #Remember that *_for_total is a len 1 list, so take the first element. kde_plot(dist[0].error_members(), ax=ax, label=label) ax.set_xlabel(r"$\alpha_S$") ax.legend() return fig
[docs]@figure def plot_poly_as_fit(fits_as, fits_replica_data_correlated, max_ndiscarded:int=4, polorder:int=2, suptitle=None): """Plot a polynomial fit of chi²(as) of `degree polorder`, keeping the replica index matched. The ``max_ndiscarded`` parameter defines th number of points discarded by postfit from which we discard the curve. """ alphas = fits_as df = fits_replica_data_correlated def ap(x): x.columns = x.columns.droplevel(0) return (x['chi2']) table = df.groupby(axis=1, level=0).apply(ap) filt = table.isnull().sum(axis=1) < max_ndiscarded table = table[filt] table = table.values fig, ax = plotutils.subplots() minimums = [] asarr = np.asarray(alphas) for i, row in enumerate(table): filt = np.isfinite(row) fit = np.polyfit(asarr[filt], row[filt], polorder) extrema = np.roots(np.polyder(fit)) #Cast away the zero complex part candidates = np.real(extrema[np.isreal(extrema)]) #candidates = candidates[(candidates > alphas[0]) & (candidates<alphas[-1])] if len(candidates): minimums.append(candidates[np.argmin(np.polyval(fit, candidates))]) color = f'C{i%10}' ax.plot(alphas, np.polyval(fit, alphas), color=color) ax.plot(asarr[filt], row[filt], 'o', color=color) minimums = np.asarray(minimums) ax.set_xlim(min(alphas), max(alphas)) ax.set_ylim(np.nanmin(table), np.nanmax(table)) ax.set_title(rf"$\alpha_S$ from order {polorder} polynomial fit " rf"= ${np.mean(minimums):.4f} \pm {np.std(minimums):.4f}$" rf"N={len(minimums)}") ax.set_xlabel(r'$\alpha_S$') ax.set_ylabel(r'$\chi²/N_{dat}$') if suptitle: fig.suptitle(suptitle) return fig
#TODO: This should probably be done more granularly and not here def _reduce_mean_parabola(df): if df is None: return None a,b,c = get_parabola(df.columns, df.mean(axis=0)) if a < 0: raise ValueError("Expecting convex parabola") minimum = -b/2/a error = 1/np.sqrt(a) return minimum, error
[docs]@figure def plot_mean_pulls(dataspecs_chi2_by_dataset_dict, dataspecs_speclabel): """Compute the pulls from the sum of the parabolas.""" d = dataspecs_chi2_by_dataset_dict dtotal = d['Total'] er_val_global = [_reduce_mean_parabola(v) for v in dtotal] ks = set(d.keys()) ks.remove('Total') l = [0]*len(dataspecs_speclabel) for k in ks: for i in range(len(dataspecs_speclabel)): l[i] = l[i] + d[k][i] er_val_global = [_reduce_mean_parabola(it) for it in l] pulls = [] for dataset_item in d: ditem = d[dataset_item] ds_pulls = [] for tb, (alphas_global, error_global) in zip(ditem, er_val_global): if tb is None: ds_pulls.append(None) continue try: cv, error = _reduce_mean_parabola(tb) except ValueError: log.error(f"Concave parabola for {dataset_item}") ds_pulls.append(None) else: ds_pulls.append(_pulls_func(cv, alphas_global, error, error_global)) pulls.append(ds_pulls) pulls = np.asarray(pulls,dtype=float) #Add total sum #sp = np.atleast_2d(np.nansum(pulls, axis=0)) #pulls =np.concatenate([pulls, sp], axis=0) #fig,ax = barplot(pulls.T, [*d.keys(), 'sum'], dataspecs_speclabel) fig,ax = barplot(pulls.T, d.keys(), dataspecs_speclabel) ax.legend() return fig
# Define aliases for functions with spelling mistakes in their names which have now been corrected # Do this so that old runcards still work plot_as_datasets_pseudorreplicas_chi2 = plot_as_datasets_pseudoreplicas_chi2 plot_dataspecs_pseudorreplica_means = plot_dataspecs_pseudoreplica_means