# -*- coding: utf-8 -*-
"""
Utilities for computing correlations in batch.
@author: Zahari Kassabov
"""
import numpy as np
import numpy.linalg as la
from reportengine import collect
from validphys.checks import check_pdf_is_montecarlo
from validphys.core import Stats
# This would be a good candidate to be optimized to calculate everything in one
# pass over x,
def _basic_obs_pdf_correlation(pdf_val, obs_val):
"""Calculate the correlation between pdfs and observables.
The expected format is two arrays
obs_val: (nbin x nreplicas) as returned from thresults.error_members
pdf_val: (nreplicas x nf x nf) as returned from xplotting_grid.grid_values.error_members
The returned array contains the PDF correlation between
the value of the obsevable and the PDF at the corresponding point in (fl,x)
space. The dimensions are:
(nbins x nf x nx), compatible with grid_values, upon
changing replicas->bins.
"""
x = pdf_val - np.mean(pdf_val, axis=0)
y = (obs_val - np.mean(obs_val, axis=-1, keepdims=True)).T
# We want to compute:
# sum(x*y)/(norm(x)*norm(y))
# broadcast to the appropriate dimensions
num = np.einsum('ij,ikm->jkm', y, x)
xnorm = la.norm(x, axis=0)
ynorm = la.norm(y, axis=0)
# like np.outer, but keeping the right shape
den = np.einsum('i,jk->ijk', ynorm, xnorm)
return num / den
def _basic_obs_obs_correlation(obs1, obs2):
"""Calculate the correlation between two observables. The expected format is
arrays instances of:
obs1: (nbins, nreplicas)
obs2: (nbins, nreplicas)
The result is (nbins1 , nbins2), a mareix containing the correlation
coefficients between the two sets.
"""
x = obs1 - np.mean(obs1, axis=1, keepdims=True)
y = (obs2 - np.mean(obs2, axis=1, keepdims=True)).T
return x @ y / np.outer(la.norm(x, axis=1), la.norm(y, axis=0))
[docs]@check_pdf_is_montecarlo
def obs_pdf_correlations(pdf, results, xplotting_grid):
"""Return the correlations between each point in a dataset and the PDF
values on a grid of (x,f) points in a format similar to `xplotting_grid`."""
_, th = results
# Wrap the result in a standard Stats class
# since the result is (npoints, flavours, ndata) and has nothing to do with the PDF replicas
pdf_val = xplotting_grid.grid_values.error_members()
obs_val = th.error_members
corrs = Stats(_basic_obs_pdf_correlation(pdf_val, obs_val))
return xplotting_grid.copy_grid(grid_values=corrs)
corrpair_results = collect("results", ["corrpair"])
corrpair_datasets = collect("dataset", ["corrpair"])
[docs]@check_pdf_is_montecarlo
def obs_obs_correlations(pdf, corrpair_results):
"""Return the theoretical correlation matrix between a pair of observables."""
(_, th1), (_, th2) = corrpair_results
return _basic_obs_obs_correlation(th1.error_members, th2.error_members)