# -*- coding: utf-8 -*-
"""
Plots and analysis tools for SIMUnet.
"""
from __future__ import generator_stop
import logging
import numpy as np
import numpy.linalg as la
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import ListedColormap
from matplotlib.ticker import MultipleLocator
import matplotlib.image as image
from matplotlib.offsetbox import (OffsetImage, AnnotationBbox)
import matplotlib.colors as colors
import pandas as pd
import seaborn as sns
import itertools
from reportengine.figure import figure, figuregen
from reportengine.checks import make_check, CheckError, make_argcheck, check
from reportengine import collect
from reportengine.table import table
from reportengine.floatformatting import format_number
from validphys import plotutils
from validphys.fitdata import replica_paths
from validphys.fitdata import read_bsm_facs
from validphys.plotutils import grey_centre_cmap
from validphys.pdfbases import PDG_PARTONS
from validphys.utils import split_ranges
from validphys.loader import Loader
from validphys.n3fit_data_utils import parse_simu_parameters_names_CF
from validphys.loader import _get_nnpdf_profile
from validphys.convolution import central_predictions
log = logging.getLogger(__name__)
l = Loader()
"""
Format routines
---------------
"""
BSM_FAC_DISPLAY = ['OtZ', 'OtW', 'OtG',
'Opt', 'O3pQ3', 'O3pq', 'OpQM', 'OpqMi', 'Opui', 'Opdi', 'O3pl', 'Opl', 'Ope',
'O1qd', 'O1qu', 'O1dt', 'O1qt', 'O1ut', 'O11qq', 'O13qq',
'O8qd', 'O8qu', 'O8dt', 'O8qt', 'O8ut', 'O81qq', 'O83qq',
'OQt8', 'OQQ1', 'OQQ8', 'OQt1', 'Ott1', 'Oeu', 'Olu', 'Oed',
'Olq3', 'Olq1', 'Oqe', 'Old', 'Oll', 'Omup', 'Otap', 'Otp',
'Obp', 'Ocp', 'OG', 'OWWW', 'OpG', 'OpW', 'OpB', 'OpWB', 'Opd', 'OpD',]
[docs]def reorder_cols(cols):
"""
Reorders columns based on predefined BSM factor display order.
Parameters
----------
cols : list
List of column names to be reordered.
Returns
-------
list
Reordered list of columns.
"""
return sorted(cols, key=BSM_FAC_DISPLAY.index)
"""
---------------
"""
[docs]@figuregen
def plot_nd_bsm_facs(read_bsm_facs, bsm_names_to_latex, posterior_plots_settings=None):
"""
Plot a histogram for each BSM coefficient.
The nd is used for n-dimensional, if two BSM facs are present: use instead :py:func:`validphys.results.plot_2d_bsm_facs`
Parameters
----------
read_bsm_facs : pd.DataFrame
DataFrame containing the BSM factors.
bsm_names_to_latex : dict
Dictionary mapping BSM names to LaTeX representations.
posterior_plots_settings : dict, optional
Settings for posterior plots, such as number of bins and range settings.
Yields
------
fig : matplotlib.figure.Figure
A matplotlib figure object for the histogram.
"""
# extract settings
if posterior_plots_settings is None:
n_bins = 10
rangex = None
rangey = None
else:
try:
n_bins = posterior_plots_settings["n_bins"]
except KeyError:
n_bins = 10
try:
rangex = posterior_plots_settings["rangex"]
except KeyError:
rangex = None
try:
rangey = posterior_plots_settings["rangey"]
except KeyError:
rangey = None
for label, column in read_bsm_facs.items():
# TODO: surely there is a better way
fig, ax = plt.subplots()
ax.hist(column.values, density=True, bins=n_bins)
ax.ticklabel_format(axis='x', style='sci', scilimits=(0,0))
ax.set_title(f"Distribution for {bsm_names_to_latex[label]} coefficient")
ax.set_ylabel("Prob. density", fontsize=14)
ax.set_xlabel(bsm_names_to_latex[label] + r"$/\Lambda^2$ [TeV$^{-2}$]", fontsize=16)
ax.grid(False)
if rangex is not None:
ax.set_xlim(rangex)
if rangey is not None:
ax.set_ylim(rangey)
yield fig
[docs]@figuregen
def plot_nd_bsm_facs_fits(fits, bsm_names_to_latex, posterior_plots_settings=None):
"""
Compare histograms of BSM factors between different fits in SIMUnet.
Parameters
----------
fits : NSList
List of FitSpec to be compared.
bsm_names_to_latex : dict
Dictionary mapping BSM names to their LaTeX representations.
posterior_plots_settings : dict, optional
Dictionary containing settings for posterior plots such as 'n_bins', 'rangex', 'rangey', and 'same_bins'.
Yields
------
fig : matplotlib.figure.Figure
A matplotlib figure object for each BSM coefficient comparison.
"""
# extract settings
if posterior_plots_settings is None:
same_bins = False
n_bins = 10
rangex = None
rangey = None
else:
try:
same_bins = posterior_plots_settings["same_bins"]
except KeyError:
same_bins = False
try:
n_bins = posterior_plots_settings["n_bins"]
except KeyError:
n_bins = 10
try:
rangex = posterior_plots_settings["rangex"]
except KeyError:
rangex = None
try:
rangey = posterior_plots_settings["rangey"]
except KeyError:
rangey = None
# extract all operators in the fits
all_ops = []
for fit in fits:
paths = replica_paths(fit)
bsm_facs_df = read_bsm_facs(paths)
bsm_fac_ops = bsm_facs_df.columns.tolist()
all_ops.append(bsm_fac_ops)
# Remove repeated operators
all_ops = {o for fit_ops in all_ops for o in fit_ops}
# If same_bins=True, create binnings
if same_bins:
min_bins = pd.Series(dict(zip(list(all_ops), np.full(len(all_ops), np.inf))))
max_bins = pd.Series(dict(zip(list(all_ops), np.full(len(all_ops), -np.inf))))
for fit in fits:
paths = replica_paths(fit)
bsm_facs_df = read_bsm_facs(paths)
min_df = bsm_facs_df.min()
max_df = bsm_facs_df.max()
min_bins = pd.concat([min_bins, min_df], axis=1).min(axis=1)
max_bins = pd.concat([max_bins, max_df], axis=1).max(axis=1)
# plot all operators
for op in all_ops:
fig, ax = plt.subplots()
for fit in fits:
paths = replica_paths(fit)
bsm_facs_df = read_bsm_facs(paths)
if same_bins:
bins = np.linspace(min_bins.loc[op], max_bins.loc[op], n_bins)
else:
bins = n_bins
if bsm_facs_df.get([op]) is not None:
ax.hist(bsm_facs_df.get([op]).values, bins=bins, density=True, alpha=0.5, label=fit.label)
ax.ticklabel_format(axis='x', style='sci', scilimits=(0,0))
ax.set_ylabel("Prob. density", fontsize=14)
if bsm_names_to_latex is None:
ax.set_xlabel(op, fontsize=14)
else:
ax.set_xlabel(bsm_names_to_latex[op] + r"$/\Lambda^2$ [TeV$^{-2}$]", fontsize=16)
ax.legend()
ax.grid(False)
if rangex is not None:
ax.set_xlim(rangex)
if rangey is not None:
ax.set_ylim(rangey)
yield fig
[docs]@figuregen
def plot_kde_bsm_facs(read_bsm_facs, bsm_names_to_latex):
"""
Plots the kernel estimation density for a distribution of BSM coefficients.
Parameters
----------
read_bsm_facs : pd.DataFrame
DataFrame containing the BSM factors.
bsm_names_to_latex : dict
Dictionary mapping BSM names to LaTeX representations.
Yields
------
fig : matplotlib.figure.Figure
A matplotlib figure object for the KDE plot.
"""
for label, column in read_bsm_facs.items():
# Initialise Axes instance
fig, ax = plt.subplots()
# populate the Axes with the KDE
ax = plotutils.kde_plot(column.values)
# Format of the plot
ax.ticklabel_format(axis='x', style='sci', scilimits=(0,0))
ax.set_title(f"KDE for {bsm_names_to_latex[label]} coefficient")
ax.set_ylabel("Prob. density", fontsize=14)
ax.set_xlabel(bsm_names_to_latex[label] + r"$/\Lambda^2$ [TeV$^{-2}]$", fontsize=14)
ax.grid(True)
yield fig
@make_argcheck
def _check_two_bsm_facs(fit):
cf = fit.as_input().get("simu_parameters", [])
l = len(cf)
check(
l == 2,
"Exactly two elements are required in "
f"`simu_parameters` for fit '{fit}', but {l} found.",
)
[docs]@figure
#@_check_two_bsm_facs
def plot_2d_bsm_facs(read_bsm_facs, replica_data):
"""
Plot two-dimensional distributions of the BSM coefficient results.
Parameters
----------
read_bsm_facs : pd.DataFrame
DataFrame containing the BSM factors.
replica_data : list
List of FitInfo.
Returns
-------
fig : matplotlib.figure.Figure
A matplotlib figure object for the 2D distribution plot.
"""
bsm_facs_df = read_bsm_facs
labels = bsm_facs_df.columns
chi2 = [info.chi2 for info in replica_data]
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
chi2 = [info.chi2 for info in replica_data]
scatter_plot = ax.scatter(
bsm_facs_df.iloc[:, 0], bsm_facs_df.iloc[:, 1], c=chi2
)
# create new axes to the bottom of the scatter plot
# for the colourbar
divider = make_axes_locatable(ax)
# the width of the colorbar can be changed with `size`
cax = divider.append_axes("bottom", size="8%", pad=0.7)
fig.colorbar(scatter_plot, cax=cax, label=r"$\chi^2$", orientation='horizontal')
# set scientific notation for thei scatter plot
ax.ticklabel_format(
axis='both', scilimits=(0, 0), style='sci', useOffset=True
)
# append axes to the top and to the right for the histograms
ax_histx = divider.append_axes("top", 0.5, pad=0.5, sharex=ax)
ax_histy = divider.append_axes("right", 0.5, pad=0.3, sharey=ax)
# Make some labels invisible
ax_histx.xaxis.set_tick_params(labelbottom=False)
ax_histy.yaxis.set_tick_params(labelleft=False)
# populate the histograms
ax_histx.hist(bsm_facs_df.iloc[:, 0])
ax_histy.hist(bsm_facs_df.iloc[:, 1], orientation='horizontal')
ax_histx.grid(False)
ax_histy.grid(False)
ax.set_xlabel(labels[0])
ax.set_ylabel(labels[1])
ax.set_axisbelow(True)
return fig
def _select_plot_2d_bsm_facs(read_bsm_facs, replica_data, bsm_names_to_latex, pair):
"""
Auxiliary function to plot 2D plots of pair of operators in a N-dimensional fits with BSM factors.
Parameters
----------
read_bsm_facs : pd.DataFrame
DataFrame containing BSM factors.
replica_data : list
List of FitInfo.
bsm_names_to_latex : dict
Dictionary mapping BSM names to LaTeX representations.
pair : tuple
Pair of operators to be plotted.
Returns
-------
fig : matplotlib.figure.Figure
A matplotlib figure object for the 2D plot.
"""
op_1, op_2 = pair
bsm_facs_df = read_bsm_facs
if op_1 != op_2:
bsm_facs_df = bsm_facs_df[[op_1, op_2]]
labels = bsm_facs_df.columns
else:
bsm_facs_df = bsm_facs_df[[op_1]]
labels = [bsm_facs_df.columns]*2
chi2 = [info.chi2 for info in replica_data]
# we use this figsize to have a square scatter plot
# smaller values do not display too well
fig, ax = plt.subplots(1, 1, figsize=(7, 7))
chi2 = [info.chi2 for info in replica_data]
if op_1 != op_2:
scatter_plot = ax.scatter(
bsm_facs_df.iloc[:, 0], bsm_facs_df.iloc[:, 1], c=chi2, s=40
)
else:
scatter_plot = ax.scatter(
bsm_facs_df.values, bsm_facs_df.values, c=chi2, s=40
)
# create new axes to the bottom of the scatter plot
# for the colourbar
divider = make_axes_locatable(ax)
cax = divider.append_axes("bottom", size="9%", pad=0.7)
fig.colorbar(scatter_plot, cax=cax, label=r"$\chi^2$", orientation='horizontal')
# set scientific notation for thei scatter plot
ax.ticklabel_format(
axis='both', scilimits=(0, 0), style='sci', useOffset=True
)
# append axes to the top and to the right for the histograms
ax_histx = divider.append_axes("top", 0.5, pad=0.5, sharex=ax)
ax_histy = divider.append_axes("right", 0.5, pad=0.3, sharey=ax)
# Make some labels invisible
ax_histx.xaxis.set_tick_params(labelbottom=False)
ax_histy.yaxis.set_tick_params(labelleft=False)
# populate the histograms
ax_histx.hist(bsm_facs_df.iloc[:, 0], density=True)
if op_1 != op_2:
ax_histy.hist(bsm_facs_df.iloc[:, 1], orientation='horizontal', density=True)
else:
ax_histy.hist(bsm_facs_df.iloc[:, 0], orientation='horizontal', density=True)
ax_histx.grid(False)
ax_histy.grid(False)
ax.set_xlabel(bsm_names_to_latex[labels[0]] + r"$/\Lambda^2$ [TeV$^{-2}]$", fontsize=15)
ax.set_ylabel(bsm_names_to_latex[labels[1]] + r"$/\Lambda^2$ [TeV$^{-2}]$", fontsize=15)
ax.set_axisbelow(True)
return fig
[docs]@figuregen
def plot_bsm_2d_combs(read_bsm_facs, replica_data, bsm_names_to_latex):
"""
Plot two-dimensional distributions for all pairs of BSM coefficients in a fit.
Parameters
----------
read_bsm_facs : pd.DataFrame
DataFrame containing BSM factors.
replica_data : list
List of FitInfo.
bsm_names_to_latex : dict
Dictionary mapping BSM names to LaTeX representations.
Yields
------
fig : matplotlib.figure.Figure
A matplotlib figure object for each pair of BSM coefficient combination.
"""
bsm_facs_df = read_bsm_facs
labels = bsm_facs_df.columns
combs = itertools.combinations(labels, 2)
for comb in combs:
fig = _select_plot_2d_bsm_facs(bsm_facs_df, replica_data, bsm_names_to_latex, pair=comb)
yield fig
[docs]@figure
def plot_chi2_bsm_facs(read_bsm_facs, replica_data):
"""
Generates bsm_fac value - chi2 scatter plots for all replicas in a fit.
Parameters
----------
read_bsm_facs : pd.DataFrame
DataFrame containing BSM factors.
replica_data : list
List of FitInfo.
Returns
-------
fig : matplotlib.figure.Figure
A matplotlib figure object for the chi2 scatter plot.
"""
chi2 = [info.chi2 for info in replica_data]
for label, column in read_bsm_facs.iteritems():
fig, ax = plt.subplots()
ax.scatter(
column, chi2, s=40, alpha=0.8
)
# set scientific notation for the scatter plot
ax.ticklabel_format(
axis='both', scilimits=(0, 0), style='sci', useOffset=True
)
ax.set_xlabel(label)
ax.set_ylabel(r"$\chi^2$")
ax.set_axisbelow(True)
ax.grid(True)
return fig
[docs]@figuregen
def plot_tr_val_epoch(fit, replica_paths):
"""
Plot the average across replicas of training and validation chi2
for a given epoch.
Parameters
----------
fit : FitSpec
Object containing the specifications of the fit.
replica_paths : list
List of paths to the replica data.
Yields
------
fig : matplotlib.figure.Figure
The figure object containing the plot.
"""
paths = [p / 'chi2exps.log' for p in replica_paths]
# initialise dataframe
all_cols = pd.concat([pd.read_json(i).loc['total'] for i in paths], axis=1)
# get training and validation data
tr_data = all_cols.applymap(lambda x: x['training'], na_action='ignore')
val_data = all_cols.applymap(lambda x: x['validation'], na_action='ignore')
tr_chi2 = tr_data.mean(axis=1)
val_chi2 = val_data.mean(axis=1)
fig, ax = plt.subplots()
# formatting
ax.plot(tr_chi2.index, tr_chi2, label=r'Training $\chi^2$')
ax.plot(val_chi2.index, val_chi2, label=r'Validation $\chi^2$')
ax.legend()
ax.grid(True)
ax.set_title(fit.label)
ax.set_xlabel('Epoch')
ax.set_ylabel(r'$\chi^2$', rotation='horizontal', labelpad=10.0)
yield fig
[docs]@table
def bsm_facs_bounds(read_bsm_facs, bsm_names_to_latex):
"""
Table generator to summarise information about the BSM coefficient results.
Parameters
----------
read_bsm_facs : pd.DataFrame
DataFrame containing BSM factors.
bsm_names_to_latex : dict
Dictionary mapping BSM names to LaTeX representations.
Returns
-------
pd.DataFrame
DataFrame summarizing the bounds and statistics of BSM coefficients.
"""
bsm_facs_df = read_bsm_facs
# Get the numbers from the dataframe
means = bsm_facs_df.mean()
stds = bsm_facs_df.std()
cl68_lower, cl68_upper = (means - stds, means + stds)
cl95_lower, cl95_upper = (means - 2 * stds, means + 2 * stds)
# Format the numbers to display
means_disp = display_format(means)
stds_disp = display_format(stds)
cl68_lower_disp = display_format(cl68_lower)
cl68_upper_disp = display_format(cl68_upper)
cl95_lower_disp = display_format(cl95_lower)
cl95_upper_disp = display_format(cl95_upper)
# fill the dataframe
df = pd.DataFrame(index=bsm_facs_df.columns)
df.index = [bsm_names_to_latex[i] for i in df.index]
df['68% CL bounds'] = list(zip(cl68_lower_disp, cl68_upper_disp))
df['95% CL bounds'] = list(zip(cl95_lower_disp, cl95_upper_disp))
df['Mean'] = means_disp
df['Std'] = stds_disp
return df
[docs]@table
def tabulate_bsm_corr(fit, read_bsm_facs):
"""
Generate a correlation table for BSM coefficients similar to the corresponding plot.
Parameters
----------
fit : FitSpec
Object containing the specifications of the fit.
read_bsm_facs : pd.DataFrame
DataFrame containing BSM factors.
Returns
-------
pd.DataFrame
The correlation matrix as a DataFrame.
"""
bsm_facs_df = read_bsm_facs
bsm_facs_df = bsm_facs_df.reindex(columns=reorder_cols(bsm_facs_df.columns))
corr_mat = bsm_facs_df.corr()
round(corr_mat, 1)
return corr_mat
[docs]@figure
def plot_2d_bsm_facs_pair(read_bsm_facs, replica_data, bsm_names_to_latex, op1, op2):
"""
Auxiliary function to plot 2D plots of a pair of operators in a N-dimensional fits with BSM factors.
Parameters
----------
read_bsm_facs : pd.DataFrame
DataFrame containing BSM factors.
replica_data : list
List of FitInfo.
bsm_names_to_latex : dict
Dictionary mapping BSM names to LaTeX representations.
op1 : str
The first operator name.
op2 : str
The second operator name.
Returns
-------
matplotlib.figure.Figure
The figure object containing the 2D plot.
"""
return _select_plot_2d_bsm_facs(read_bsm_facs, replica_data, bsm_names_to_latex, (op1, op2))
[docs]@figure
def plot_bsm_corr(fit, read_bsm_facs, bsm_names_to_latex, corr_threshold=0.5):
"""
Plot a correlation matrix to summarise information about the BSM coefficient results.
Parameters
----------
fit : FitSpec
Object containing the specifications of the fit.
read_bsm_facs : pd.DataFrame
DataFrame containing BSM factors.
bsm_names_to_latex : dict
Dictionary mapping BSM names to LaTeX representations.
corr_threshold : float, optional
Threshold for coloration in the correlation matrix.
Returns
-------
matplotlib.figure.Figure
The figure object containing the correlation matrix plot.
"""
# figsize (11, 9) has good proportions
fig, ax = plt.subplots(1, 1, figsize=(11, 9))
# set background colour
ax.set_facecolor("0.9")
# read dataframe and round numbers
bsm_facs_df = read_bsm_facs
bsm_facs_df = bsm_facs_df.reindex(columns=reorder_cols(bsm_facs_df.columns))
# note that bsm_names_to_latex can be None
if bsm_names_to_latex:
bsm_facs_df.columns = [bsm_names_to_latex[col] for col in bsm_facs_df.columns]
corr_mat = bsm_facs_df.corr()
round(corr_mat, 1)
# create colourmap with gret in the centre for colourbar
new_cmap = grey_centre_cmap(frac=corr_threshold)
# formatting
ax.xaxis.tick_top() # x axis on top
ax.xaxis.set_label_position('top')
ax.set_title(fit.label, fontsize=20, pad=20)
# create heatmap
ax = sns.heatmap(corr_mat,
vmin=-1.0, vmax=1.0, linewidths=.5, square=True, cmap=new_cmap);
return fig
[docs]@figuregen
def plot_bsm_pdf_corr(
pdf,
read_bsm_facs,
xplotting_grid,
Q,
bsm_names_to_latex,
mark_threshold: float = 0.9,
ymin: (float, type(None)) = None,
ymax: (float, type(None)) = None,
dashed_line_flavours: (list, type(None)) = None,
):
"""
Plot the correlation between BSM factors and a PDF.
Parameters
----------
pdf : PDF object
The parton distribution function being analyzed.
read_bsm_facs : DataFrame or callable
Data or function representing BSM factors.
xplotting_grid : XGrid object
Object containing the plotting grid information.
Q : float
Momentum transfer value in GeV.
bsm_names_to_latex : dict
Mapping of BSM factor names to their LaTeX representations.
mark_threshold : float, optional
Threshold for marking significant correlations, by default 0.9.
ymin : float or None, optional
Minimum y-axis value, by default None.
ymax : float or None, optional
Maximum y-axis value, by default None.
dashed_line_flavours : list of str or None, optional
List of flavours to be plotted with dashed lines, by default None.
Yields
------
fig : matplotlib.figure.Figure
The matplotlib figure object for the plot.
bsm_fac : str
Name of the BSM factor being plotted.
"""
# read dataframe
bsm_facs_df = read_bsm_facs
# reorder BSM facs
bsm_facs_df = bsm_facs_df.reindex(columns=reorder_cols(bsm_facs_df.columns))
# get xplotting_grid
# x_grid_obj = xplotting_grid(pdf, Q, basis=Basespecs[0]["basis"])
x_grid_obj = xplotting_grid
if dashed_line_flavours is None:
dashed_line_flavours = []
for bsm_fac in bsm_facs_df.columns:
# get the values of the BSM factors
bsm_fac_vals = bsm_facs_df[bsm_fac].values
# Initialise axes
fig, ax = plt.subplots()
# Define xgrid and scale
xgrid = x_grid_obj.xgrid
scale = x_grid_obj.scale
# get grid values
gv = x_grid_obj.grid_values.error_members()
for index, flavour in enumerate(x_grid_obj.flavours):
flavour_label = flavour
parton_grids = gv[:, index, ...]
# calculate correlation
num = np.mean(bsm_fac_vals.reshape(-1, 1) * parton_grids, axis=0) - np.mean(parton_grids, axis=0) * np.mean(bsm_fac_vals)
den = np.sqrt(np.mean(bsm_fac_vals**2) - np.mean(bsm_fac_vals)**2) * np.sqrt(np.mean(parton_grids**2, axis=0)- np.mean(parton_grids, axis=0)**2)
corr = num / den
if flavour in dashed_line_flavours:
style = "--"
else:
style = "-"
ax.plot(xgrid, corr, style, label=fr'${flavour_label}$')
# Plot threshold
mask = np.abs(corr) > mark_threshold
ranges = split_ranges(xgrid, mask, filter_falses=True)
for r in ranges:
ax.axvspan(r[0], r[-1], color='#eeeeff')
ax.set_xscale(scale)
ax.set_xlim(xgrid[0], xgrid[-1])
ax.set_xlabel(r'$x$')
ax.set_title(f'Correlation {bsm_names_to_latex[bsm_fac]} - {pdf.label}\nQ = {Q} GeV')
ax.set_ylim(ymin, ymax)
ax.legend(loc="best")
ax.grid(True)
#ax.set_axisbelow(True)
#ax.set_adjustable("datalim")
yield fig, bsm_fac
[docs]@figuregen
def plot_bsm_pdf_corr_fits(fits, pdfs, xplotting_grids, Q, bsm_names_to_latex):
"""
Plot correlations between BSM factors and multiple PDFs.
Parameters
----------
fits : NSList
List of FitSpec to be compared.
pdfs : list of PDF objects
List of parton distribution functions corresponding to the fits.
xplotting_grids : list of XGrid objects
List of plotting grid objects corresponding to each fit.
Q : float
Momentum transfer value in GeV.
bsm_names_to_latex : dict
Mapping of BSM factor names to their LaTeX representations.
Yields
------
fig : matplotlib.figure.Figure
The matplotlib figure object for the plot.
"""
# extract all operators in the fits
all_ops = []
for fit in fits:
paths = replica_paths(fit)
bsm_facs_df = read_bsm_facs(paths)
bsm_fac_ops = bsm_facs_df.columns.tolist()
all_ops.append(bsm_fac_ops)
# Remove repeated operators
all_ops = reorder_cols({o for fit_ops in all_ops for o in fit_ops})
# plot correlation per operator
# if an operator is not in the fit then it is
# simply not plotted
for bsm_fac in all_ops:
# Initialise axes
fig, ax = plt.subplots()
for fit in fits:
paths = replica_paths(fit)
bsm_facs_df = read_bsm_facs(paths)
# get PDF
pdf = pdfs[fits.index(fit)]
# get x_object
x_grid_obj = xplotting_grids[fits.index(fit)]
if bsm_facs_df.get([bsm_fac]) is not None:
bsm_fac_vals = bsm_facs_df[bsm_fac].values
# Define xgrid and scale
xgrid = x_grid_obj.xgrid
scale = x_grid_obj.scale
# get grid values
gv = x_grid_obj.grid_values.error_members()
for flavour in x_grid_obj.flavours:
flavour_label = flavour
index = tuple(x_grid_obj.flavours).index(flavour)
parton_grids = gv[:, index, ...]
# calculate correlation
num = np.mean(bsm_fac_vals.reshape(-1, 1) * parton_grids, axis=0) - np.mean(parton_grids, axis=0) * np.mean(bsm_fac_vals)
den = np.sqrt(np.mean(bsm_fac_vals**2) - np.mean(bsm_fac_vals)**2) * np.sqrt(np.mean(parton_grids**2, axis=0)- np.mean(parton_grids, axis=0)**2)
corr = num / den
ax.plot(xgrid, corr, label=fr'$\rho({flavour_label},$ ' + bsm_names_to_latex[bsm_fac] + f') {pdf.label}')
ax.set_xscale(scale)
ax.set_xlabel(r'$x$')
ax.set_title(f'Correlation {bsm_names_to_latex[bsm_fac]} - PDFs ' + f'(Q = {Q} GeV)')
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.17))
ax.grid(True)
ax.set_axisbelow(True)
ax.set_adjustable("datalim")
yield fig
[docs]@figuregen
def plot_2d_bsm_facs_fits(fits, bsm_names_to_latex):
"""
Generate 2D scatter plots and histograms comparing BSM factor values across different fits.
This function takes a set of fits and compares the BSM factors between them. For each pair
of BSM factors, it creates a 2D scatter plot with corresponding histograms on the x and y axes.
Parameters
----------
fits : NSList
List of FitSpec to be compared.
bsm_names_to_latex : dict
Dictionary mapping BSM factor names to their LaTeX string representations.
Yields
------
fig : matplotlib.figure.Figure
The matplotlib figure object for each pair of BSM factors.
"""
# extract all operators in the fits
all_ops = []
for fit in fits:
paths = replica_paths(fit)
bsm_facs_df = read_bsm_facs(paths)
bsm_fac_ops = bsm_facs_df.columns.tolist()
all_ops.append(bsm_fac_ops)
# Remove repeated operators
all_ops = {o for fit_ops in all_ops for o in fit_ops}
# get all pairs
pairs = itertools.combinations(all_ops, 2)
# plot all pairs of operators
for pair in pairs:
op_1, op_2 = pair
# use this size to keep them sqaure
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.ticklabel_format(
axis='both', scilimits=(0, 0), style='sci', useOffset=True
)
divider = make_axes_locatable(ax)
# append axes to the top and to the right for the histograms
ax_histx = divider.append_axes("top", 0.5, pad=0.5, sharex=ax)
ax_histy = divider.append_axes("right", 0.5, pad=0.3, sharey=ax)
# Make some labels invisible
ax_histx.xaxis.set_tick_params(labelbottom=False)
ax_histy.yaxis.set_tick_params(labelleft=False)
for fit in fits:
paths = replica_paths(fit)
bsm_facs_df = read_bsm_facs(paths)
# display the result in the figure only if the fit has the two operators in the pair
if bsm_facs_df.get([op_1]) is not None and bsm_facs_df.get([op_2]) is not None:
ax.scatter(
bsm_facs_df.get([op_1]), bsm_facs_df.get([op_2]), label=fit.label, alpha=0.5, s=40
)
# populate the histograms
ax_histx.hist(bsm_facs_df.get([op_1]), alpha=0.5, density=True)
ax_histy.hist(bsm_facs_df.get([op_2]), orientation='horizontal', alpha=0.5, density=True)
ax_histx.grid(False)
ax_histy.grid(False)
ax.set_xlabel(bsm_names_to_latex[op_1] + r"$/\Lambda^2$ [TeV$^{-2}]$", fontsize=14)
ax.set_ylabel(bsm_names_to_latex[op_2] + r"$/\Lambda^2$ [TeV$^{-2}]$", fontsize=14)
ax.legend()
ax.set_axisbelow(True)
yield fig
[docs]@table
def bsm_facs_bounds_fits(fits, bsm_names_to_latex, n_sigma=2):
"""
Generate a table summarizing the bounds of BSM coefficients in different fits.
This function processes a list of fits, extracting the BSM coefficients and
summarizing their mean, standard deviation, and confidence levels. The confidence
levels are computed as mean ± n_sigma * std.
Parameters
----------
fits : NSList
List of FitSpec to be compared.
bsm_names_to_latex : dict
A dictionary mapping BSM factor names to their LaTeX representations.
n_sigma : int, optional
The multiplier for the standard deviation to define confidence level bounds, by default 2.
Returns
-------
pd.DataFrame
A pandas DataFrame containing the bounds for each BSM factor in each fit, along with
additional metrics like 'Best-fit shift' and 'Broadening'.
"""
# extract all operators in the fits
all_ops = []
for fit in fits:
paths = replica_paths(fit)
bsm_facs_df = read_bsm_facs(paths)
bsm_fac_ops = bsm_facs_df.columns.tolist()
all_ops.extend(bsm_fac_ops)
all_ops = list(dict.fromkeys(all_ops))
fit_names = [fit.label for fit in fits]
extra_metrics = ['Best-fit shift', 'Broadening']
# include extra metrics in columns
fit_names.extend(extra_metrics)
# Initialise df
df = pd.DataFrame(index=all_ops, columns=fit_names)
# plot all operators
for op in all_ops:
best_fits = []
bound_lengths = []
for fit in fits:
paths = replica_paths(fit)
bsm_facs_df = read_bsm_facs(paths)
if bsm_facs_df.get([op]) is not None:
values = bsm_facs_df[op]
mean = values.mean()
std = values.std()
cl_lower, cl_upper = (mean - n_sigma * std, mean + n_sigma * std)
lower_dis = format_number(cl_lower, digits=2)
upper_dis = format_number(cl_upper, digits=2)
df[fit.label].loc[op] = f"({lower_dis}, {upper_dis})"
# best-fit value
best_fits.append(mean)
# calculate bound length
length = cl_upper - cl_lower
bound_lengths.append(length)
else:
df[fit.label].loc[op] = 'Not in fit'
# if the operator is not in the fit, then append None
best_fits.append(np.nan)
bound_lengths.append(np.nan)
# best-fit shift column
df['Best-fit shift'].loc[op] = format_number(best_fits[0] - best_fits[1], digits=2)
# broadening column
curr_len, ref_len = bound_lengths
if ref_len > 0:
df['Broadening'].loc[op] = str(np.round((curr_len - ref_len) / ref_len * 100.0, decimals=2)) + '%'
else:
df['Broadening'].loc[op] = 'n/a'
# formatting columns
for column in df.columns[:2]:
if n_sigma == 1:
df = df.rename(columns={column: f'68% CL - {column}'})
elif n_sigma == 2:
df = df.rename(columns={column: f'95% CL - {column}'})
mapping = {df.columns[0]: '(Current) ' + df.columns[0],
df.columns[1]: '(Reference) ' + df.columns[1]}
df = df.rename(columns=mapping)
df.index = [bsm_names_to_latex[i] for i in df.index]
return df
[docs]@table
def bsm_facs_68bounds_fits(fits, bsm_names_to_latex,):
"""
Generate a table summarizing the 68% confidence level (CL) bounds for BSM factors from various fits.
This function processes a list of fits and utilizes `bsm_facs_bounds_fits` to calculate and
return the 68% CL bounds for BSM coefficients.
Parameters
----------
fits : NSList
List of FitSpec to be compared.
bsm_names_to_latex : dict
A dictionary mapping BSM factor names to their LaTeX representations.
Returns
-------
pd.DataFrame
A pandas DataFrame containing the 68% CL bounds for each BSM factor in each fit.
"""
return bsm_facs_bounds_fits(fits, bsm_names_to_latex, n_sigma=1)
[docs]@table
def bsm_facs_95bounds_fits(fits, bsm_names_to_latex):
"""
Generate a table summarizing the 95% confidence level (CL) bounds for BSM factors from various fits.
This function processes a list of fits and utilizes `bsm_facs_bounds_fits` to calculate and
return the 95% CL bounds for BSM coefficients.
Parameters
----------
fits : NSList
List of FitSpec to be compared.
bsm_names_to_latex : dict
A dictionary mapping BSM factor names to their LaTeX representations.
Returns
-------
pd.DataFrame
A pandas DataFrame containing the 95% CL bounds for each BSM factor in each fit.
"""
return bsm_facs_bounds_fits(fits, bsm_names_to_latex, n_sigma=2)
[docs]@figuregen
def plot_smefit_internal_comparison(bsm_names_to_latex, smefit_reference_1, smefit_reference_2, bsm_names_to_plot_scales, smefit_labels):
"""
Generates comparison plots between SMEFiT fits.
This function creates plots to compare two different SMEFiT fits. It plots the best fit values
and the bounds for BSM (Beyond the Standard Model) coefficients, allowing for an easy comparison
of the two fits. It supports both linear and symmetric logarithmic scales.
Parameters
----------
bsm_names_to_latex : dict
Dictionary mapping BSM factor names to their LaTeX representations.
smefit_reference_1 : list of dicts
List of dictionaries containing BSM coefficient information for the first SMEFiT reference.
smefit_reference_2 : list of dicts
List of dictionaries containing BSM coefficient information for the second SMEFiT reference.
bsm_names_to_plot_scales : dict
Dictionary to scale the BSM names for plotting.
smefit_labels : list of str
Labels for the SMEFiT references to be used in the plot.
Yields
------
fig : matplotlib.figure.Figure
The matplotlib figure object for the comparison plot.
"""
# extract all operators in the SMEFiT fits
all_ops = []
for fit in [smefit_reference_1, smefit_reference_2]:
ops_list = []
for entry in fit:
ops_list += [entry['name']]
all_ops.append(ops_list)
# Remove repeated operators and reorder
all_ops = reorder_cols({o for fit_ops in all_ops for o in fit_ops})
# store the relevant values
bounds_dict = {}
best_fits_dict ={}
# Now extend the bounds_dict and best_fits_dict with SMEFiT stuff
bounds_1 = []
best_fits_1 = []
for op in all_ops:
best_fits_1 += [bsm_names_to_plot_scales[op]*smefit_reference_1[x]['best'] for x in range(len(smefit_reference_1)) if smefit_reference_1[x]['name'] == op]
bounds_1 += [[bsm_names_to_plot_scales[op]*smefit_reference_1[x]['lower_bound'], bsm_names_to_plot_scales[op]*smefit_reference_1[x]['upper_bound']] for x in range(len(smefit_reference_1)) if smefit_reference_1[x]['name'] == op]
bounds_dict[smefit_labels[0]] = bounds_1
best_fits_dict[smefit_labels[0]] = best_fits_1
# Now extend the bounds_dict and best_fits_dict with SMEFiT stuff
bounds_2 = []
best_fits_2 = []
for op in all_ops:
best_fits_2 += [bsm_names_to_plot_scales[op]*smefit_reference_2[x]['best'] for x in range(len(smefit_reference_2)) if smefit_reference_2[x]['name'] == op]
bounds_2 += [[bsm_names_to_plot_scales[op]*smefit_reference_2[x]['lower_bound'], bsm_names_to_plot_scales[op]*smefit_reference_2[x]['upper_bound']] for x in range(len(smefit_reference_2)) if smefit_reference_2[x]['name'] == op]
bounds_dict[smefit_labels[1]] = bounds_2
best_fits_dict[smefit_labels[1]] = best_fits_2
# plot parameters
scales= ['linear', 'symlog']
colour_key = ['#66C2A5', '#FC8D62', '#8DA0CB']
for scale in scales:
# initialise plots
fig, ax = plt.subplots(1, 1, figsize=(10, 4))
# line for SM prediction
ax.axhline(y=0.0, color='k', linestyle='--', alpha=0.3, label='SM')
labels = smefit_labels
idx = 0
for label in labels:
bounds = bounds_dict[label]
best_fits = best_fits_dict[label]
x_coords = [i - 0.1 + 0.2*idx for i in range(len(all_ops))]
bounds_min = [bound[0] for bound in bounds]
bounds_max= [bound[1] for bound in bounds]
ax.scatter(x_coords, best_fits, color=colour_key[idx])
ax.vlines(x=x_coords, ymin=bounds_min, ymax=bounds_max, label='95% CL ' + label,
color=colour_key[idx], lw=2.0)
idx += 1
# set x positions for labels and labels
ax.set_xticks(np.arange(len(all_ops)))
bsm_latex_names = []
for op in all_ops:
if bsm_names_to_plot_scales[op] != 1:
bsm_latex_names += [str(bsm_names_to_plot_scales[op]) + '$\cdot$' + bsm_names_to_latex[op]]
else:
bsm_latex_names += [bsm_names_to_latex[op]]
ax.set_xticklabels(bsm_latex_names, rotation='vertical', fontsize=10)
# set y labels
ax.set_ylabel(r'$c_i / \Lambda^2 \ \ [ \operatorname{TeV}^{-2} ] $', fontsize=10)
# treatment of the symmetric log scale
if scale == 'symlog':
ax.set_yscale(scale, linthresh=0.1)
# turn off scientific notation
ax.yaxis.set_major_formatter(mticker.ScalarFormatter())
ax.yaxis.get_major_formatter().set_scientific(False)
y_values = [-100, -10, -1, -0.1, 0.0, 0.1, 1, 10, 100]
ax.set_yticks(y_values)
# get rid of scientific notation in y axis and
# get rid of '.0' for floats bigger than 1
ax.get_yaxis().set_major_formatter(mpl.ticker.FuncFormatter(lambda x, p: format(int(x), ',') if abs(x) >= 1 else x))
# treatment of linear scale
else:
ax.set_yscale(scale)
# final formatting
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15))
ax.grid(True)
ax.set_axisbelow(True)
ax.set_adjustable("datalim")
# Load image and add it to the plot
#file_name = "logo_black.png"
#logo = image.imread(file_name)
#The OffsetBox is a simple container artist.
#The child artists are meant to be drawn at a relative position to its #parent.
#imagebox = OffsetImage(logo, zoom = 0.15)
#Container for the imagebox referring to a specific position *xy*.
#ab = AnnotationBbox(imagebox, (20, -5), frameon = False)
#ax.add_artist(ab)
# frames on all sides
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
yield fig
[docs]@figuregen
def plot_smefit_comparison(fits, bsm_names_to_latex, smefit_reference, bsm_names_to_plot_scales, smefit_label):
"""
Generates plots comparing bounds obtained from SIMUnet fits with those obtained by SMEFiT.
This function takes a set of SIMUnet fits and a SMEFiT reference, extracting BSM coefficients
from each. It plots the mean and standard deviation of the BSM coefficients for each fit,
along with confidence levels calculated as mean ± 2*std. It supports both linear and symmetric
logarithmic scales for the plots.
Parameters
----------
fits: NSList
List of FitSpec to be compared.
bsm_names_to_latex : dict
Dictionary mapping BSM factor names to their LaTeX representations.
smefit_reference : list of dicts
List of dictionaries containing BSM coefficient information from a SMEFiT reference.
bsm_names_to_plot_scales : dict
Dictionary to scale the BSM names for plotting.
smefit_label : str
Label for the SMEFiT reference to be used in the plot.
Yields
------
fig : matplotlib.figure.Figure
The matplotlib figure object for the comparison plot.
"""
# extract all operators in the fits
all_ops = []
for fit in fits:
paths = replica_paths(fit)
bsm_facs_df = read_bsm_facs(paths)
bsm_fac_ops = bsm_facs_df.columns.tolist()
all_ops.append(bsm_fac_ops)
# Remove repeated operators and reorder
all_ops = reorder_cols({o for fit_ops in all_ops for o in fit_ops})
# store the relevant values
bounds_dict = {}
best_fits_dict ={}
for fit in fits:
bounds = []
best_fits = []
for op in all_ops:
paths = replica_paths(fit)
bsm_facs_df = read_bsm_facs(paths)
if bsm_facs_df.get([op]) is not None:
values = bsm_names_to_plot_scales[op]*bsm_facs_df[op]
mean = values.mean()
std = values.std()
cl_lower, cl_upper = (mean - 2*std, mean + 2*std)
# best-fit value
best_fits.append(mean)
# append bounds
bounds.append([cl_lower, cl_upper])
else:
# if the operator is not in the fit, then assume SM
best_fits.append(np.nan)
bounds.append([np.nan, np.nan])
bounds_dict[fit.label] = bounds
best_fits_dict[fit.label] = best_fits
# Now extend the bounds_dict and best_fits_dict with SMEFiT stuff
bounds = []
best_fits = []
for op in all_ops:
best_fits += [bsm_names_to_plot_scales[op]*smefit_reference[x]['best'] for x in range(len(smefit_reference)) if smefit_reference[x]['name'] == op]
bounds += [[bsm_names_to_plot_scales[op]*smefit_reference[x]['lower_bound'], bsm_names_to_plot_scales[op]*smefit_reference[x]['upper_bound']] for x in range(len(smefit_reference)) if smefit_reference[x]['name'] == op]
bounds_dict[smefit_label] = bounds
best_fits_dict[smefit_label] = best_fits
# plot parameters
scales= ['linear', 'symlog']
colour_key = ['#66C2A5', '#FC8D62', '#8DA0CB']
for scale in scales:
# initialise plots
fig, ax = plt.subplots(1, 1, figsize=(10, 4))
# line for SM prediction
ax.axhline(y=0.0, color='k', linestyle='--', alpha=0.3, label='SM')
labels = [fit.label for fit in fits] + [smefit_label]
idx = 0
for label in labels:
bounds = bounds_dict[label]
best_fits = best_fits_dict[label]
x_coords = [i - 0.1 + 0.2*idx for i in range(len(all_ops))]
bounds_min = [bound[0] for bound in bounds]
bounds_max= [bound[1] for bound in bounds]
ax.scatter(x_coords, best_fits, color=colour_key[idx])
ax.vlines(x=x_coords, ymin=bounds_min, ymax=bounds_max, label='95% CL ' + label,
color=colour_key[idx], lw=2.0)
idx += 1
# set x positions for labels and labels
ax.set_xticks(np.arange(len(all_ops)))
bsm_latex_names = []
for op in all_ops:
if bsm_names_to_plot_scales[op] != 1:
bsm_latex_names += [str(bsm_names_to_plot_scales[op]) + '$\cdot$' + bsm_names_to_latex[op]]
else:
bsm_latex_names += [bsm_names_to_latex[op]]
ax.set_xticklabels(bsm_latex_names, rotation='vertical', fontsize=10)
# set y labels
ax.set_ylabel(r'$c_i / \Lambda^2 \ \ [ \operatorname{TeV}^{-2} ] $', fontsize=10)
# treatment of the symmetric log scale
if scale == 'symlog':
ax.set_yscale(scale, linthresh=0.1)
# turn off scientific notation
ax.yaxis.set_major_formatter(mticker.ScalarFormatter())
ax.yaxis.get_major_formatter().set_scientific(False)
y_values = [-100, -10, -1, -0.1, 0.0, 0.1, 1, 10, 100]
ax.set_yticks(y_values)
# get rid of scientific notation in y axis and
# get rid of '.0' for floats bigger than 1
ax.get_yaxis().set_major_formatter(mpl.ticker.FuncFormatter(lambda x, p: format(int(x), ',') if abs(x) >= 1 else x))
# treatment of linear scale
else:
ax.set_yscale(scale)
# final formatting
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15))
ax.grid(True)
ax.set_axisbelow(True)
ax.set_adjustable("datalim")
# Load image and add it to the plot
#file_name = "logo_black.png"
#logo = image.imread(file_name)
#The OffsetBox is a simple container artist.
#The child artists are meant to be drawn at a relative position to its #parent.
#imagebox = OffsetImage(logo, zoom = 0.15)
#Container for the imagebox referring to a specific position *xy*.
#ab = AnnotationBbox(imagebox, (20, -5), frameon = False)
#ax.add_artist(ab)
# frames on all sides
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
yield fig
[docs]@figuregen
def plot_bsm_facs_bounds(fits, bsm_names_to_latex, bsm_names_to_plot_scales):
"""
Generates plots of the bounds for BSM coefficients from various fits.
This function takes a list of fits and generates plots showing the mean, standard deviation,
and confidence levels of the BSM coefficients. The confidence levels are calculated as mean ± 2*std.
It supports both linear and symmetric logarithmic scales for the plots.
Parameters
----------
fits: NSList
List of FitSpec to be compared.
bsm_names_to_latex : dict
Dictionary mapping BSM factor names to their LaTeX representations.
bsm_names_to_plot_scales : dict
Dictionary to scale the BSM names for plotting.
Yields
------
fig : matplotlib.figure.Figure
The matplotlib figure object for each generated plot.
"""
# extract all operators in the fits
all_ops = []
for fit in fits:
paths = replica_paths(fit)
bsm_facs_df = read_bsm_facs(paths)
bsm_fac_ops = bsm_facs_df.columns.tolist()
all_ops.append(bsm_fac_ops)
# Remove repeated operators and reorder
all_ops = reorder_cols({o for fit_ops in all_ops for o in fit_ops})
# store the relevant values
bounds_dict = {}
best_fits_dict ={}
for fit in fits:
bounds = []
best_fits = []
for op in all_ops:
paths = replica_paths(fit)
bsm_facs_df = read_bsm_facs(paths)
if bsm_facs_df.get([op]) is not None:
# note that bsm_names_to_plot_scales can be None
if bsm_names_to_plot_scales:
values = bsm_names_to_plot_scales[op]*bsm_facs_df[op]
else:
values = bsm_facs_df[op]
mean = values.mean()
std = values.std()
cl_lower, cl_upper = (mean - 2*std, mean + 2*std)
# best-fit value
best_fits.append(mean)
# append bounds
bounds.append([cl_lower, cl_upper])
else:
# if the operator is not in the fit, add np.nan
best_fits.append(np.nan)
bounds.append([np.nan, np.nan])
bounds_dict[fit.label] = bounds
best_fits_dict[fit.label] = best_fits
# plot parameters
scales = ['linear', 'symlog']
colour_key = ['#66C2A5', '#FC8D62', '#8DA0CB']
for scale in scales:
# initialise plots
fig, ax = plt.subplots(1, 1, figsize=(10, 4))
# line for SM prediction
ax.axhline(y=0.0, color='k', linestyle='--', alpha=0.3, label='SM')
for fit in fits:
bounds = bounds_dict[fit.label]
best_fits = best_fits_dict[fit.label]
x_coords = [i - 0.1 + 0.2*fits.index(fit) for i in range(len(all_ops))]
bounds_min = [bound[0] for bound in bounds]
bounds_max= [bound[1] for bound in bounds]
ax.scatter(x_coords, best_fits, color=colour_key[fits.index(fit)])
ax.vlines(x=x_coords, ymin=bounds_min, ymax=bounds_max, label='95% CL ' + fit.label,
color=colour_key[fits.index(fit)], lw=2.0)
# set x positions for labels and labels
ax.set_xticks(np.arange(len(all_ops)))
bsm_latex_names = []
for op in all_ops:
# note that bsm_names_to_plot_scales can be None
if bsm_names_to_plot_scales:
if bsm_names_to_plot_scales[op] != 1:
bsm_latex_names += [str(bsm_names_to_plot_scales[op]) + '$\cdot$' + bsm_names_to_latex[op]]
else:
bsm_latex_names += [bsm_names_to_latex[op]]
ax.set_xticklabels(bsm_latex_names, rotation='vertical', fontsize=10)
# set y labels
ax.set_ylabel(r'$c_i / \Lambda^2 \ \ [ \operatorname{TeV}^{-2} ] $', fontsize=10)
# treatment of the symmetric log scale
if scale == 'symlog':
ax.set_yscale(scale, linthresh=0.1)
# turn off scientific notation
ax.yaxis.set_major_formatter(mticker.ScalarFormatter())
ax.yaxis.get_major_formatter().set_scientific(False)
y_values = [-100, -10, -1, -0.1, 0.0, 0.1, 1, 10, 100]
ax.set_yticks(y_values)
# get rid of scientific notation in y axis and
# get rid of '.0' for floats bigger than 1
ax.get_yaxis().set_major_formatter(mpl.ticker.FuncFormatter(lambda x, p: format(int(x), ',') if abs(x) >= 1 else x))
# treatment of linear scale
else:
ax.set_yscale(scale)
# final formatting
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15))
ax.grid(True)
ax.set_axisbelow(True)
ax.set_adjustable("datalim")
# frames on all sides
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
yield fig
[docs]@figuregen
def plot_bsm_facs_68res(fits, bsm_names_to_latex):
"""
Generates plots of the 68% residuals of BSM coefficients for various fits.
This function processes a set of fits and generates plots showing the 68% residuals of the
BSM (Beyond the Standard Model) coefficients. The residuals are calculated as the mean of
the coefficients divided by their standard deviation, providing a measure of deviation
from the Standard Model.
Parameters
----------
fits: NSList
List of FitSpec to be compared.
bsm_names_to_latex : dict
Dictionary mapping BSM factor names to their LaTeX representations.
Yields
------
fig : matplotlib.figure.Figure
The matplotlib figure object for the residual plot.
"""
# extract all operators in the fits
all_ops = []
for fit in fits:
paths = replica_paths(fit)
bsm_facs_df = read_bsm_facs(paths)
bsm_fac_ops = bsm_facs_df.columns.tolist()
all_ops.append(bsm_fac_ops)
# Remove repeated operators and reorder
all_ops = reorder_cols({o for fit_ops in all_ops for o in fit_ops})
# store the relevant values
residuals_dict = {}
for fit in fits:
residuals = []
for op in all_ops:
paths = replica_paths(fit)
bsm_facs_df = read_bsm_facs(paths)
if bsm_facs_df.get([op]) is not None:
values = bsm_facs_df[op]
mean = values.mean()
std = values.std()
# append residual
residuals.append(mean / std)
else:
# if the operator is not in the fit, then assume SM
residuals.append(0.0)
residuals_dict[fit.label] = residuals
# plotting specs
colour_key = ['#66C2A5', '#FC8D62', '#8DA0CB']
# initialise plots
fig, ax = plt.subplots(1, 1, figsize=(10, 4))
# line for 0
ax.axhline(y=0.0, color='k', linestyle='--', alpha=0.8)
# line for +-1 residual
ax.axhline(y=1.0, color='k', linestyle='--', alpha=0.3)
ax.axhline(y=-1.0, color='k', linestyle='--', alpha=0.3)
for fit in fits:
residuals = residuals_dict[fit.label]
ordered_residuals = format_residuals(residuals)
x_coords = [i - 0.1 + 0.2*fits.index(fit) for i in range(len(all_ops))]
residuals_min = [residual[0] for residual in ordered_residuals]
residuals_max = [residual[1] for residual in ordered_residuals]
ax.vlines(x=x_coords, ymin=residuals_min, ymax=residuals_max, label=fit.label,
color=colour_key[fits.index(fit)], lw=4.0)
# set x positions for labels and labels
ax.set_xticks(np.arange(len(all_ops)))
bsm_latex_names = []
for op in all_ops:
bsm_latex_names += [bsm_names_to_latex[op]]
ax.set_xticklabels(bsm_latex_names, rotation='vertical', fontsize=10)
# set y scale
ax.set_yscale('linear')
# set y labels
ax.set_ylabel(r'Residuals (68%)', fontsize=10)
# final formatting
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15))
ax.grid(True)
ax.set_axisbelow(True)
ax.set_adjustable("datalim")
# frames on all sides
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)
yield fig
_read_pdf_cfactors = collect("read_bsm_facs", ("pdffit",))
def read_pdf_cfactors(_read_pdf_cfactors, pdf):
return _read_pdf_cfactors[0]
[docs]def dataset_scaled_fit_cfactor(dataset, pdf, read_pdf_cfactors, quad_cfacs):
"""For each replica of ``pdf``, scale the fit cfactors by
the best fit value.
Returns
-------
res: np.arrays
An ``ndat`` x ``nrep`` array containing the scaled fit cfactors.
"""
parsed_cfacs = parse_fit_cfac(dataset.fit_cfac, dataset.cuts)
if parsed_cfacs is None or not read_pdf_cfactors.values.size:
# We want an array of ones that ndata x nrep
# where ndata is the number of post cut datapoints
ndata = len(dataset.load().get_cv())
nrep = len(pdf) - 1
return np.ones((ndata, nrep))
log.debug("Scaling results using linear cfactors")
fit_cfac_df = pd.DataFrame(
{k: v.central_value.squeeze() for k, v in parsed_cfacs.items()}
)
scaled_replicas = read_pdf_cfactors.values * fit_cfac_df.values[:, np.newaxis]
if quad_cfacs:
log.debug("Scaling results using quadratic cfactors")
parsed_quads = parse_quad_cfacs(dataset.fit_cfac, dataset.cuts, quad_cfacs)
quad_cfac_df = pd.DataFrame(
{k: v.central_value.squeeze() for k, v in parsed_quads.items()}
)
scaled_replicas += (read_pdf_cfactors.values**2) * quad_cfac_df.values[:, np.newaxis]
return 1 + np.sum(scaled_replicas, axis=2)
"""
Principal component analysis
"""
[docs]def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
"""
Truncates a colormap to a specific range.
This function creates a new colormap based on a given colormap but truncated to the range specified
by minval and maxval. This is useful for adjusting the range of colors used in a plot.
Parameters
----------
cmap : matplotlib.colors.Colormap
The original colormap to be truncated.
minval : float, optional
The minimum value of the new colormap, by default 0.0.
maxval : float, optional
The maximum value of the new colormap, by default 1.0.
n : int, optional
The number of discrete colors in the new colormap, by default 100.
Returns
-------
matplotlib.colors.LinearSegmentedColormap
The truncated colormap.
"""
new_cmap = colors.LinearSegmentedColormap.from_list(
'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
cmap(np.linspace(minval, maxval, n)))
return new_cmap
def _compute_fisher_information_matrix(dataset_inputs, theoryid, groups_covmat, simu_parameters_names, pdf):
"""
Computes the Fisher information matrix for a given set of datasets and simulation parameters.
This function calculates the Fisher information matrix, which quantifies the amount of information
that an observable random variable carries about an unknown parameter upon which the probability
of the random variable depends. It takes into account the datasets, theory IDs, groups covariance
matrix, simulation parameters, and the parton distribution function (PDF).
Parameters
----------
dataset_inputs : list of Dataset objects
The datasets used for computing the Fisher information matrix.
theoryid : int or array-like
Theory identifier(s) associated with the datasets.
groups_covmat : pd.DataFrame
Covariance matrices for the groups in the datasets.
simu_parameters_names : list of str
List of names of the simulation parameters.
pdf : PDF object
The parton distribution function object.
Returns
-------
pd.DataFrame
The computed Fisher information matrix as a pandas DataFrame.
"""
bsm_factors = []
if dataset_inputs is not None:
for dataset in dataset_inputs:
ds = l.check_dataset(name=dataset.name, theoryid=theoryid, cfac=dataset.cfac, simu_parameters_names=dataset.simu_parameters_names, simu_parameters_linear_combinations=dataset.simu_parameters_linear_combinations, use_fixed_predictions=dataset.use_fixed_predictions)
bsm_fac = parse_simu_parameters_names_CF(ds.simu_parameters_names_CF, ds.simu_parameters_linear_combinations, cuts=ds.cuts)
central_sm = central_predictions(ds, pdf)
coefficients = central_sm.to_numpy().T * np.array([i.central_value for i in bsm_fac.values()])
bsm_factors += [coefficients]
# Make bsm_factors into a nice numpy array.
bsm_factors = np.concatenate(bsm_factors, axis=1).T
# The rows are the data, the columns are the operator
cov = groups_covmat.to_numpy()
inv_cov = np.linalg.inv(cov)
fisher = bsm_factors.T @ inv_cov @ bsm_factors
fisher = pd.DataFrame(fisher, index=simu_parameters_names)
fisher = fisher.T
fisher.index = simu_parameters_names
return fisher
[docs]@table
def principal_component_values(fisher_information_matrix):
"""
Returns the eigenvalues corresponding to the various principal directions.
This function performs a principal component analysis (PCA) on the Fisher information matrix
to return the eigenvalues. These eigenvalues represent the variance of the data along the
principal components, which can be used to understand the dominant directions in the data.
Parameters
----------
fisher_information_matrix : pd.DataFrame
The Fisher information matrix.
Returns
-------
pd.DataFrame
A DataFrame containing the eigenvalues of the Fisher information matrix.
"""
fisher = fisher_information_matrix.to_numpy()
fisher = fisher - fisher.mean(axis=0)
_, values, _ = np.linalg.svd(fisher)
values = pd.DataFrame(values)
return values
[docs]@table
def principal_component_vectors(fisher_information_matrix, simu_parameters_names):
"""
Performs a principal component analysis to obtain the principal directions (vectors).
This function calculates the principal component vectors from the Fisher information matrix,
providing insights into the flat directions of the parameter space. These directions can
indicate combinations of parameters that are less constrained by the data.
Parameters
----------
fisher_information_matrix : pd.DataFrame
The Fisher information matrix.
simu_parameters_names : list of str
Names of the simulation parameters.
Returns
-------
pd.DataFrame
A DataFrame containing the principal component vectors, indexed by the simulation parameters.
"""
fisher = fisher_information_matrix.to_numpy()
fisher = fisher - fisher.mean(axis=0)
_, _, vectors = np.linalg.svd(fisher)
vectors = pd.DataFrame(vectors, columns=simu_parameters_names)
return vectors