Source code for validphys.theorycovariance.output

"""
output.py
Basic tools for plotting theory covariance matrices and their properties.
"""
from __future__ import generator_stop

import logging
from math import inf

from matplotlib import cm
from matplotlib import colors as mcolors
import numpy as np
import pandas as pd

from reportengine import collect
from reportengine.figure import figure
from validphys import plotutils
from validphys.theorycovariance.construction import theory_covmat_custom

log = logging.getLogger(__name__)


[docs]def matrix_plot_labels(df): """Returns the tick locations and labels, and the starting point values for each category, based on a dataframe to be plotted. The dataframe is assumed to be multiindexed by (process, dataset, points) or else (dataset, points). The tick location is in the centre of the dataset, and labelling is by the outermost index of the multiindex.""" if len(df.index[0]) == 3: proclabels = [x[0] for x in df.index] labels = proclabels elif len(df.index[0]) == 2: dslabels = [x[0] for x in df.index] labels = dslabels unique_ds = [] unique_ds.append([labels[0], 0]) for x in range(len(labels) - 1): if labels[x + 1] != labels[x]: unique_ds.append([labels[x + 1], x + 1]) ticklabels = [unique_ds[x][0] for x in range(len(unique_ds))] startlocs = [unique_ds[x][1] for x in range(len(unique_ds))] startlocs += [len(labels)] ticklocs = [0 for x in range(len(startlocs) - 1)] for i in range(len(startlocs) - 1): ticklocs[i] = 0.5 * (startlocs[i + 1] + startlocs[i]) return ticklocs, ticklabels, startlocs
[docs]def plot_covmat_heatmap(covmat, title): """Matrix plot of a covariance matrix.""" df = covmat df.sort_index(axis=0, inplace=True) df.sort_index(axis=1, inplace=True) oldindex = df.index.tolist() newindex = sorted(oldindex, key=_get_key) # reindex index newdf = df.reindex(newindex) # reindex columns by transposing, reindexing, then transposing back newdf = (newdf.T.reindex(newindex)).T matrix = newdf.values fig, ax = plotutils.subplots(figsize=(15, 15)) matrixplot = ax.matshow( 100 * matrix, cmap=cm.Spectral_r, norm=mcolors.SymLogNorm( linthresh=0.00001, linscale=1, vmin=-100 * matrix.max(), vmax=100 * matrix.max() ), ) cbar = fig.colorbar(matrixplot, fraction=0.046, pad=0.04) cbar.set_label(label="% of data", fontsize=20) cbar.ax.tick_params(labelsize=20) ax.set_title(title, fontsize=25) ticklocs, ticklabels, startlocs = matrix_plot_labels(newdf) ax.set_xticks(ticklocs) ax.set_xticklabels(ticklabels, rotation=30, ha="right", fontsize=20) ax.xaxis.tick_bottom() ax.set_yticks(ticklocs) ax.set_yticklabels(ticklabels, fontsize=20) # Shift startlocs elements 0.5 to left so lines are between indexes startlocs_lines = [x - 0.5 for x in startlocs] ax.vlines(startlocs_lines, -0.5, len(matrix) - 0.5, linestyles="dashed") ax.hlines(startlocs_lines, -0.5, len(matrix) - 0.5, linestyles="dashed") ax.margins(x=0, y=0) return fig
_procorder = ("DIS NC", "DIS CC", "TOP", "DY NC", "DY CC", "SINGLETOP", "JETS", "PHOTON", "DIJET") _dsorder = ( "BCDMSP", "BCDMSD", "SLACP", "SLACD", "NMC", "NMCPD", "HERAF2CHARM", "HERACOMBNCEP460", "HERACOMBNCEP575", "HERACOMBNCEP820", "HERACOMBNCEP920", "HERACOMBNCEM", "CHORUSNU", "CHORUSNB", "NTVNUDMN", "NTVNBDMN", "HERACOMBCCEP", "HERACOMBCCEM", "CDFZRAP", "D0ZRAP", "D0WEASY", "D0WMASY", "ATLASWZRAP36PB", "ATLASZHIGHMASS49FB", "ATLASLOMASSDY11EXT", "ATLASWZRAP11", "ATLASZPT8TEVMDIST", "ATLASZPT8TEVYDIST", "CMSWEASY840PB", "CMSWMASY47FB", "CMSWCHARMRAT", "CMSDY2D11", "CMSWMU8TEV", "CMSWCHARMTOT", "CMSZDIFF12", "LHCBZ940PB", "LHCBWZMU7TEV", "LHCBWZMU8TEV", "LHCBZEE2FB", "ATLAS1JET11", "CMSJETS11", "CDFR2KT", "ATLASTTBARTOT", "ATLASTOPDIFF8TEVTRAPNORM", "CMSTTBARTOT", "CMSTOPDIFF8TEVTTRAPNORM", ) def _get_key(element): """The key used to sort covariance matrix dataframes according to the ordering of processes and datasets specified in _procorder and _dsorder.""" x1, y1, z1 = element x2 = _procorder.index(x1) if x1 in _procorder else inf y2 = _dsorder.index(y1) if y1 in _dsorder else inf z2 = z1 newelement = (x2, y2, z2) return newelement
[docs]def plot_corrmat_heatmap(corrmat, title): """Matrix plot of a correlation matrix""" df = corrmat df.sort_index(axis=0, inplace=True) df.sort_index(axis=1, inplace=True) oldindex = df.index.tolist() newindex = sorted(oldindex, key=_get_key) # reindex index newdf = df.reindex(newindex) # reindex columns by transposing, reindexing, then transposing back newdf = (newdf.T.reindex(newindex)).T matrix = newdf.values fig, ax = plotutils.subplots(figsize=(15, 15)) matrixplot = ax.matshow(matrix, cmap=cm.Spectral_r, vmin=-1, vmax=1) cbar = fig.colorbar(matrixplot, fraction=0.046, pad=0.04) cbar.ax.tick_params(labelsize=20) ax.set_title(title, fontsize=25) ticklocs, ticklabels, startlocs = matrix_plot_labels(newdf) ax.set_xticks(ticklocs) ax.set_xticklabels(ticklabels, rotation=30, ha="right", fontsize=20) ax.xaxis.tick_bottom() ax.set_yticks(ticklocs) ax.set_yticklabels(ticklabels, fontsize=20) # Shift startlocs elements 0.5 to left so lines are between indexes startlocs_lines = [x - 0.5 for x in startlocs] ax.vlines(startlocs_lines, -0.5, len(matrix) - 0.5, linestyles="dashed") ax.hlines(startlocs_lines, -0.5, len(matrix) - 0.5, linestyles="dashed") ax.margins(x=0, y=0) return fig
[docs]@figure def plot_normexpcovmat_heatmap(procs_normcovmat): """Matrix plot of the experiment covariance matrix normalised to data.""" fig = plot_covmat_heatmap(procs_normcovmat, "Experimental Covariance Matrix") return fig
[docs]@figure def plot_expcorrmat_heatmap(procs_corrmat): """Matrix plot of the experiment correlation matrix""" fig = plot_corrmat_heatmap(procs_corrmat, "Experimental Correlation Matrix") return fig
[docs]@figure def plot_normthcovmat_heatmap_custom(theory_normcovmat_custom, theoryids, fivetheories): """Matrix plot for block diagonal theory covariance matrix by process type""" l = len(theoryids) if l == 5: if fivetheories == "bar": l = r"$\bar{5}$" fig = plot_covmat_heatmap(theory_normcovmat_custom, f"Theory Covariance matrix ({l} pt)") return fig
[docs]@figure def plot_thcorrmat_heatmap_custom(theory_corrmat_custom, theoryids, fivetheories): """Matrix plot of the theory correlation matrix, correlations by process type""" l = len(theoryids) if l == 5: if fivetheories == "bar": l = r"$\bar{5}$" fig = plot_corrmat_heatmap(theory_corrmat_custom, f"Theory Correlation matrix ({l} pt)") return fig
[docs]@figure def plot_expplusthcorrmat_heatmap_custom( experimentplustheory_corrmat_custom, theoryids, fivetheories ): """Matrix plot of the exp + theory correlation matrix""" l = len(theoryids) if l == 5: if fivetheories == "bar": l = r"$\bar{5}$" fig = plot_corrmat_heatmap( experimentplustheory_corrmat_custom, f"Experimental + Theory Correlation Matrix ({l} pt)" ) return fig
[docs]@figure def plot_diag_cov_comparison( theory_covmat_custom, procs_covmat, procs_data_values, theoryids, fivetheories ): """Plot of sqrt(cov_ii)/|data_i| for cov = exp, theory, exp+theory""" l = len(theoryids) if l == 5: if fivetheories == "bar": l = r"$\bar{5}$" data = np.abs(procs_data_values) sqrtdiags_th = np.sqrt(np.diag(theory_covmat_custom)) / data # sort by theory covmat processes newindex = sorted(sqrtdiags_th.index, key=_get_key) sqrtdiags_th = sqrtdiags_th.reindex(newindex) sqrtdiags_exp = np.sqrt(np.diag(procs_covmat)) / data sqrtdiags_exp = sqrtdiags_exp.reindex(newindex) df_total = theory_covmat_custom + procs_covmat sqrtdiags_tot = np.sqrt(np.diag(df_total)) / data sqrtdiags_tot = sqrtdiags_tot.reindex(newindex) fig, ax = plotutils.subplots(figsize=(20, 10)) ax.plot(sqrtdiags_exp.values, ".", label="Experiment", color="orange") ax.plot(sqrtdiags_th.values, ".", label="Theory", color="red") ax.plot(sqrtdiags_tot.values, ".", label="Total", color="blue") ticklocs, ticklabels, startlocs = matrix_plot_labels(sqrtdiags_th) ax.set_xticks(ticklocs) ax.set_xticklabels(ticklabels, rotation=45, fontsize=20) # Shift startlocs elements 0.5 to left so lines are between indexes startlocs_lines = [x - 0.5 for x in startlocs] ax.vlines(startlocs_lines, 0, len(data), linestyles="dashed") ax.set_ylabel(r"$\frac{\sqrt{S_{ii}}}{|D_i|}$", fontsize=30) ax.yaxis.set_tick_params(labelsize=20) ax.set_ylim([0, 0.5]) ax.set_title( f"Square root of diagonal elements of covariance matrices ({l} pt), " + "normalised to absolute value of data", fontsize=20, ) ax.legend(fontsize=20) ax.margins(x=0) return fig
theory_covmat_custom_dataspecs = collect(theory_covmat_custom, ("dataspecs",))
[docs]@figure def plot_diag_cov_comparison_by_process( theory_covmat_custom_dataspecs, procs_covmat, procs_data_values, dataspecs ): """Plot of sqrt(cov_ii)/|data_i| for cov = exp, theory, exp+theory, by process dataspecs here is used also to specify whether to plot or not the total uncertainty, defined as the experimental uncertainty plus the uncertainty coming from the theory covariance matrix for the relevant dataspec entry. """ fig, ax = plotutils.subplots(figsize=(20, 10)) # sort by theory covmat processes newindex = sorted(procs_data_values.index, key=_get_key) data = np.abs(procs_data_values) sqrtdiags_exp = np.sqrt(np.diag(procs_covmat)) / data sqrtdiags_exp = sqrtdiags_exp.reindex(newindex) ax.plot(sqrtdiags_exp.values, "*", markersize=4, label="Experimental uncertanties") # loop on th covmat for specs, theory_covmat_custom in zip(dataspecs, theory_covmat_custom_dataspecs): label = specs["speclabel"] sqrtdiags_th = np.sqrt(np.diag(theory_covmat_custom)) / data sqrtdiags_th = sqrtdiags_th.reindex(newindex) ax.plot(sqrtdiags_th.values, "o", markersize=4, label=label) if specs["plot_total"]: df_total = theory_covmat_custom + procs_covmat sqrtdiags_tot = np.sqrt(np.diag(df_total)) / data sqrtdiags_tot = sqrtdiags_tot.reindex(newindex) ax.plot( df_total.values, "v", markersize=4, label=f"Experimental uncertanties + {label}" ) ticklocs, ticklabels, startlocs = matrix_plot_labels(sqrtdiags_th) ax.set_xticks(ticklocs) ax.set_xticklabels(ticklabels, rotation=45, fontsize=20) # Shift startlocs elements 0.5 to left so lines are between indexes startlocs_lines = [x - 0.5 for x in startlocs] ax.vlines(startlocs_lines, 0, len(data), linestyles="dashed") ax.set_ylabel(r"$\frac{\sqrt{S_{ii}}}{|D_i|}$", fontsize=30) ax.yaxis.set_tick_params(labelsize=20) ax.set_ylim([0, 0.5]) ax.legend(fontsize=20) ax.margins(x=0) return fig
[docs]@figure def plot_diag_cov_comparison_by_experiment( theory_covmat_custom_dataspecs, experiments_covmat_no_table, procs_data_values_experiment, dataspecs, ): """Plot of sqrt(cov_ii)/|data_i| for cov = exp, theory, exp+theory, by experiment""" fig, ax = plotutils.subplots(figsize=(20, 10)) procs_data_values_experiment.sort_index(level=0, inplace=True) data = np.abs(procs_data_values_experiment) plot_index = procs_data_values_experiment.index # plot exp values, take diagonal first sqrtdiags_exp = pd.DataFrame( np.sqrt(np.diag(experiments_covmat_no_table)), index=experiments_covmat_no_table.index ) sqrtdiags_exp.sort_index(level=0, inplace=True) sqrtdiags_exp = sqrtdiags_exp[0] / data.values ax.plot(sqrtdiags_exp.values, "*", markersize=4, label="Experimental uncertanties") # loop on th covmat for specs, theory_covmat_custom in zip(dataspecs, theory_covmat_custom_dataspecs): label = specs["speclabel"] sqrtdiags_th = pd.DataFrame( np.sqrt(np.diag(theory_covmat_custom)), index=theory_covmat_custom.index.droplevel(0) ) # sort the diag th covmat by experiments temp_index_nogroup = plot_index.droplevel(0) sqrtdiags_th = sqrtdiags_th.reindex(temp_index_nogroup) sqrtdiags_th = sqrtdiags_th[0].values / data ax.plot(sqrtdiags_th.values, "o", markersize=4, label=label) if specs["plot_total"]: df_total = np.sqrt(sqrtdiags_th**2 + sqrtdiags_exp**2) ax.plot( df_total.values, "v", markersize=4, label=f"Experimental uncertanties + {label}" ) ticklocs, ticklabels, startlocs = matrix_plot_labels(sqrtdiags_th) ax.set_xticks(ticklocs) ax.set_xticklabels(ticklabels, rotation=45, fontsize=20) # Shift startlocs elements 0.5 to left so lines are between indexes startlocs_lines = [x - 0.5 for x in startlocs] ax.vlines(startlocs_lines, 0, len(data), linestyles="dashed") ax.set_ylabel(r"$\frac{\sqrt{S_{ii}}}{|D_i|}$", fontsize=30) ax.yaxis.set_tick_params(labelsize=20) ax.set_ylim([0, 0.5]) ax.legend(fontsize=20) ax.margins(x=0) return fig