Commit 66313806 authored by Marcel Rieger's avatar Marcel Rieger
Browse files

Add PlotNuisanceLikelihoodScans task with docs.

parent b898daf2
......@@ -5,6 +5,7 @@ Likelihood plots using ROOT.
"""
import math
from collections import OrderedDict
import numpy as np
import scipy.interpolate
......@@ -14,7 +15,7 @@ from scinum import Number
from dhi.config import (
poi_data, br_hh_names, campaign_labels, chi2_levels, colors, color_sequence, marker_sequence,
)
from dhi.util import import_ROOT, to_root_latex, create_tgraph, DotDict, minimize_1d
from dhi.util import import_ROOT, to_root_latex, create_tgraph, DotDict, minimize_1d, multi_match
from dhi.plots.util import (
use_style, draw_model_parameters, fill_hist_from_points, create_random_name, get_contours,
)
......@@ -323,7 +324,7 @@ def plot_likelihood_scans_1d(
legend_cols = min(int(math.ceil(len(legend_entries) / 4.)), 3)
legend_rows = int(math.ceil(len(legend_entries) / float(legend_cols)))
legend = r.routines.create_legend(pad=pad, width=legend_cols * 210, n=legend_rows,
props={"NColumns": legend_cols})
props={"NColumns": legend_cols, "TextSize": 18})
r.fill_legend(legend, legend_entries)
draw_objs.append(legend)
legend_box = r.routines.create_legend_box(legend, pad, "trl",
......@@ -653,6 +654,181 @@ def plot_likelihood_scans_2d(
canvas.SaveAs(path)
def plot_nuisance_likelihood_scans(
path,
poi,
workspace,
dataset,
fit_diagnostics_path,
fit_name="fit_s",
skip_parameters=None,
only_parameters=None,
parameters_per_page=1,
scan_points=201,
x_min=-2.,
x_max=2,
y_log=False,
model_parameters=None,
campaign=None,
):
"""
Creates a plot showing the change of the negative log-likelihood, obtained *poi*, when varying
values of nuisance paramaters and saves it at *path*. The calculation of the likelihood change
requires the RooFit *workspace* to read the model config, a RooDataSet *dataset* to construct
the functional likelihood, and the output file *fit_diagnostics_path* of the combine fit
diagnostics for reading pre- and post-fit parameters for the fit named *fit_name*, defaulting
to ``"fit_s"``.
Nuisances to skip, or to show exclusively can be configured via *skip_parameters* and
*only_parameters*, respectively, which can be lists of patterns. *parameters_per_page* defines
the number of parameter curves that are drawn in the same canvas page. The scan range and
granularity is set via *scan_points*, *x_min* and *x_max*. When *y_log* is *True*, the y-axis is
plotted with a logarithmic scale. *model_parameters* can be a dictionary of key-value pairs of
model parameters. *campaign* should refer to the name of a campaign label defined in
*dhi.config.campaign_labels*.
Example: https://cms-hh.web.cern.ch/tools/inference/tasks/postfit.html#nuisance-parameter-influence-on-likelihood
"""
import plotlib.root as r
ROOT = import_ROOT()
# helper to convert a RooArgSet into a dictionary mapping names to value-errors pairs
def convert_argset(argset):
data = OrderedDict()
it = argset.createIterator()
while True:
param = it.Next()
if not param:
break
data[param.GetName()] = (param.getVal(), param.getErrorHi(), param.getErrorLo())
return data
# get the best fit value and prefit data from the diagnostics file
f = ROOT.TFile(fit_diagnostics_path, "READ")
best_fit = f.Get(fit_name)
fit_args = best_fit.floatParsFinal()
prefit_params = convert_argset(f.Get("nuisances_prefit"))
# get the model config from the workspace
model_config = workspace.genobj("ModelConfig")
# build the nll object
nll_args = ROOT.RooLinkedList()
nll_args.Add(ROOT.RooFit.Constrain(model_config.GetNuisanceParameters()))
nll_args.Add(ROOT.RooFit.Extended(model_config.GetPdf().canBeExtended()))
nll = model_config.GetPdf().createNLL(dataset, nll_args)
# save the best fit in a snap shot
snapshot_name = "best_fit_parameters"
workspace.saveSnapshot(snapshot_name, ROOT.RooArgSet(fit_args), True)
# prepare parameters to plot, stored in groups
param_names = [[]]
for param_name in prefit_params:
if only_parameters and not multi_match(param_name, only_parameters):
continue
if skip_parameters and multi_match(param_name, skip_parameters):
continue
if parameters_per_page < 1 or len(param_names[-1]) < parameters_per_page:
param_names[-1].append(param_name)
else:
param_names.append([param_name])
# prepare the scan values, ensure that 0 is contained
scan_values = np.linspace(x_min, x_max, scan_points).tolist()
if 0 not in scan_values:
scan_values = sorted(scan_values + [0.])
# go through nuisances
canvas = None
for _param_names in param_names:
# setup the default style and create canvas and pad
first_canvas = canvas is None
r.setup_style()
canvas, (pad,) = r.routines.create_canvas(pad_props={"Logy": y_log})
pad.cd()
# start the multi pdf file
if first_canvas:
canvas.Print(path + "[")
# get nll curves for all parameters on this page
curve_data = []
for param_name in _param_names:
pre_u, pre_d = prefit_params[param_name][1:3]
workspace.loadSnapshot(snapshot_name)
param = workspace.var(param_name)
if not param:
raise Exception("parameter {} not found in workspace".format(param_name))
param_bf = param.getVal()
nll_base = nll.getVal()
x_values, y_values = [], []
for x in scan_values:
param.setVal(param_bf + (pre_u if x >= 0 else -pre_d) * x)
x_values.append(param.getVal())
y_values.append(2 * (nll.getVal() - nll_base))
curve_data.append((param_name, x_values, y_values))
# get y range
y_min_value = min(min(y_values) for _, _, y_values in curve_data)
y_max_value = max(max(y_values) for _, _, y_values in curve_data)
if y_log:
y_min = 1.e-3
y_max = y_min * 10**(1.35 * math.log10(y_max_value / y_min))
else:
y_min = y_min_value
y_max = 1.35 * (y_max_value - y_min)
# dummy histogram to control axes
x_title = "(#theta - #theta_{best}) / #Delta#theta_{pre}"
y_title = "Change in -2 log(L)"
h_dummy = ROOT.TH1F("dummy", ";{};{}".format(x_title, y_title), 1, x_min, x_max)
r.setup_hist(h_dummy, pad=pad, props={"LineWidth": 0, "Minimum": y_min, "Maximum": y_max})
draw_objs = [(h_dummy, "HIST")]
legend_entries = []
# nll graphs
for (param_name, x, y), col in zip(curve_data, color_sequence[:len(curve_data)]):
g_nll = create_tgraph(len(x), x, y)
r.setup_graph(g_nll, props={"LineWidth": 2, "LineStyle": 1}, color=colors[col])
draw_objs.append((g_nll, "SAME,C"))
legend_entries.append((g_nll, to_root_latex(param_name), "L"))
# legend
legend_cols = min(int(math.ceil(len(legend_entries) / 4.)), 3)
legend_rows = int(math.ceil(len(legend_entries) / float(legend_cols)))
legend = r.routines.create_legend(pad=pad, width=legend_cols * 210, n=legend_rows,
props={"NColumns": legend_cols, "TextSize": 18})
r.fill_legend(legend, legend_entries)
draw_objs.append(legend)
legend_box = r.routines.create_legend_box(legend, pad, "trl",
props={"LineWidth": 0, "FillColor": colors.white_trans_70})
draw_objs.insert(-1, legend_box)
# model parameter labels
if model_parameters:
draw_objs.extend(draw_model_parameters(model_parameters, pad))
# cms label
cms_labels = r.routines.create_cms_labels(pad=pad)
draw_objs.extend(cms_labels)
# campaign label
if campaign:
campaign_label = to_root_latex(campaign_labels.get(campaign, campaign))
campaign_label = r.routines.create_top_right_label(campaign_label, pad=pad)
draw_objs.append(campaign_label)
# draw objects, update and save
r.routines.draw_objects(draw_objs)
r.update_canvas(canvas)
canvas.Print(path)
# finish the pdf
if canvas:
canvas.Print(path + "]")
def evaluate_likelihood_scan_1d(poi_values, dnll2_values, poi_min=None):
"""
Takes the results of a 1D likelihood profiling scan given by the *poi_values* and the
......
......@@ -187,7 +187,7 @@ def plot_limit_scan(
else:
g_thy = create_graph(values=theory_values, key="xsec")
r.setup_graph(g_thy, props={"LineWidth": 2, "LineStyle": 1, "LineColor": colors.red})
draw_objs.append((g_thy, "SAME,L"))
draw_objs.append((g_thy, "SAME,C"))
legend_entries[0 if observed_values is None else 1] = (g_thy, "Theory prediction", "L")
# legend
......@@ -364,7 +364,7 @@ def plot_limit_scans(
legend_cols = min(int(math.ceil(len(legend_entries) / 4.)), 3)
legend_rows = int(math.ceil(len(legend_entries) / float(legend_cols)))
legend = r.routines.create_legend(pad=pad, width=legend_cols * 210, n=legend_rows,
props={"NColumns": legend_cols})
props={"NColumns": legend_cols, "TextSize": 18})
r.fill_legend(legend, legend_entries)
draw_objs.append(legend)
legend_box = r.routines.create_legend_box(legend, pad, "trl",
......
......@@ -42,7 +42,7 @@ def plot_s_over_b(
parameters. *campaign* should refer to the name of a campaign label defined in
*dhi.config.campaign_labels*.
Example: https://cms-hh.web.cern.ch/tools/inference/tasks/postfit.html
Example: https://cms-hh.web.cern.ch/tools/inference/tasks/postfit.html#combined-postfit-shapes
"""
import plotlib.root as r
ROOT = import_ROOT()
......
......@@ -26,6 +26,7 @@ def plot_pulls_impacts(
poi=None,
parameters_per_page=-1,
selected_page=-1,
only_parameters=None,
skip_parameters=None,
order_parameters=None,
order_by_impact=False,
......@@ -45,11 +46,11 @@ def plot_pulls_impacts(
*parameters_per_page* configures how many parameters are shown per plot page. When negative, all
parameters are shown in the same plot. This feature is only supported for pdfs. When
*selected_page* is non-negative, only this page is created. *skip_parameters* can be a list of
name patterns or files containing name patterns line-by-line to exclude parameters from
plotting. *order_parameters* accepts the same type of values, except they are used to order
parameters. When *order_by_impact* is *True*, *order_parameters* is neglected and the order is
derived using the largest absolute impact.
*selected_page* is non-negative, only this page is created. *only_parameters*
(*skip_parameters*) can be a list of name patterns or files containing name patterns
line-by-line to include (exclude) parameters from plotting. *order_parameters* accepts the same
type of values, except they are used to order parameters. When *order_by_impact* is *True*,
*order_parameters* is neglected and the order is derived using the largest absolute impact.
The symmetric range of pulls on the bottom x-axis is defined by *pull_range* (an integer)
whereas the range of the top x-axis associated to impact values is set by *impact_range*. For
......@@ -95,9 +96,13 @@ def plot_pulls_impacts(
print("{} total parameters found in data".format(len(params)))
# apply filtering
if only_parameters:
patterns = read_patterns(only_parameters)
params = [param for param in params if multi_match(param.name, patterns, mode=any)]
if skip_parameters:
patterns = read_patterns(skip_parameters)
params = [param for param in params if not multi_match(param.name, patterns, mode=any)]
if only_parameters or skip_parameters:
print("{} remaining parameters after filtering".format(len(params)))
# apply ordering
......
......@@ -274,7 +274,7 @@ def plot_significance_scans(
legend_cols = int(math.ceil(len(legend_entries) / 4.))
legend_rows = min(len(legend_entries), 4)
legend = r.routines.create_legend(pad=pad, y2=-20, width=legend_cols * 160, n=legend_rows,
props={"NColumns": legend_cols})
props={"NColumns": legend_cols, "TextSize": 18})
r.setup_legend(legend)
r.fill_legend(legend, legend_entries)
draw_objs.append(legend)
......
......@@ -4,6 +4,8 @@
Tasks for working with postfit results.
"""
import copy
import law
import luigi
......@@ -37,7 +39,11 @@ class FitDiagnostics(POITask, CombineCommandTask, law.LocalWorkflow, HTCondorWor
return CreateWorkspace.req(self)
def output(self):
return self.local_target("fitdiagnostics__{}.root".format(self.get_output_postfix()))
postfix = self.get_output_postfix()
return {
"result": self.local_target("result__{}.root".format(postfix)),
"diagnostics": self.local_target("fitdiagnostics__{}.root".format(postfix)),
}
@property
def blinded_args(self):
......@@ -47,6 +53,7 @@ class FitDiagnostics(POITask, CombineCommandTask, law.LocalWorkflow, HTCondorWor
return "--toys {self.toys}".format(self=self)
def build_command(self):
outputs = self.output()
return (
"combine -M FitDiagnostics {workspace}"
" --verbose 1"
......@@ -61,14 +68,18 @@ class FitDiagnostics(POITask, CombineCommandTask, law.LocalWorkflow, HTCondorWor
" --saveWithUncertainties"
" --saveNormalizations"
" --saveWorkspace"
" --saveToys"
" {self.combine_optimization_args}"
" {self.custom_args}"
" && "
"mv fitDiagnostics.root {output}"
"mv higgsCombineTest.FitDiagnostics.mH{self.mass_int}.123456.root {output_result}"
" && "
"mv fitDiagnosticsTest.root {output_diagnostics}"
).format(
self=self,
workspace=self.input().path,
output=self.output().path,
output_result=outputs["result"].path,
output_diagnostics=outputs["diagnostics"].path,
)
......@@ -121,7 +132,7 @@ class PlotPostfitSOverB(POIPlotTask):
output.parent.touch()
# get the path to the fit diagnostics file
fit_diagnostics_path = self.input()["collection"][0].path
fit_diagnostics_path = self.input()["collection"][0]["diagnostics"].path
# call the plot function
self.call_plot_func(
......@@ -138,3 +149,88 @@ class PlotPostfitSOverB(POIPlotTask):
model_parameters=self.get_shown_parameters(),
campaign=self.campaign if self.campaign != law.NO_STR else None,
)
class PlotNuisanceLikelihoodScans(POIPlotTask):
x_min = copy.copy(POIPlotTask.x_min)
x_max = copy.copy(POIPlotTask.x_max)
x_min._default = -2.
x_max._default = 2.
y_log = luigi.BoolParameter(
default=False,
description="apply log scaling to the y-axis; default: False",
)
only_parameters = law.CSVParameter(
default=(),
significant=False,
description="patterns of parameters to include; skips all others; no default",
)
skip_parameters = law.CSVParameter(
default=(),
significant=False,
description="patterns of parameters to skip; no default",
)
parameters_per_page = luigi.IntParameter(
default=1,
description="number of parameters per page; creates a single page when < 1; default: 1",
)
file_type = "pdf"
y_min = None
y_max = None
z_min = None
z_max = None
force_n_pois = 1
def requires(self):
return FitDiagnostics.req(self)
def output(self):
parts = ["nlls", "{}To{}".format(self.x_min, self.x_max), self.get_output_postfix()]
if self.y_log:
parts.append("log")
return self.local_target(self.create_plot_name(parts))
@law.decorator.log
@law.decorator.notify
@view_output_plots
@law.decorator.safe_output
def run(self):
# prepare the output
output = self.output()
output.parent.touch()
# get input targets
inputs = self.input()
fit_result = inputs["collection"][0]["result"]
fit_diagnostics = inputs["collection"][0]["diagnostics"]
# open the result file to load the workspace and other objects
with fit_result.load("READ", formatter="root") as result_file:
# get workspace
w = result_file.Get("w")
# load the dataset
dataset = w.data("data_obs") if self.unblinded else result_file.Get("toys/toy_asimov")
# call the plot function
self.call_plot_func(
"dhi.plots.likelihoods.plot_nuisance_likelihood_scans",
path=output.path,
poi=self.pois[0],
workspace=w,
dataset=dataset,
fit_diagnostics_path=fit_diagnostics.path,
fit_name="fit_s",
only_parameters=self.only_parameters,
skip_parameters=self.skip_pameters,
parameters_per_page=self.parameters_per_page,
x_min=self.x_min,
x_max=self.x_max,
y_log=self.y_log,
model_parameters=self.get_shown_parameters(),
campaign=self.campaign if self.campaign != law.NO_STR else None,
)
......@@ -17,6 +17,11 @@ from dhi.datacard_tools import get_workspace_parameters
class PullsAndImpactsBase(POITask):
only_parameters = law.CSVParameter(
default=(),
description="comma-separated parameter names to include; supports patterns; skips all "
"others; no default",
)
skip_parameters = law.CSVParameter(
default=(),
description="comma-separated parameter names to be skipped; supports patterns; "
......@@ -62,6 +67,8 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow
params = [p for p in params if not is_mc_stats(p)]
# skip
if self.only_parameters:
params = [p for p in params if law.util.multi_match(p, self.only_parameters)]
if self.skip_parameters:
params = [p for p in params if not law.util.multi_match(p, self.skip_parameters)]
......@@ -139,6 +146,8 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow
if self.mc_stats:
postfix += "_mcstats"
if self.only_parameters:
postfix += "_only" + law.util.create_hash(sorted(self.only_parameters))
if self.skip_parameters:
postfix += "_skip" + law.util.create_hash(sorted(self.skip_parameters))
......@@ -155,6 +164,8 @@ class MergePullsAndImpacts(PullsAndImpactsBase):
parts = []
if self.mc_stats:
parts.append("mcstats")
if self.only_parameters:
parts.append("only_" + law.util.create_hash(sorted(self.only_parameters)))
if self.skip_parameters:
parts.append("skip_" + law.util.create_hash(sorted(self.skip_parameters)))
......@@ -305,6 +316,8 @@ class PlotPullsAndImpacts(PullsAndImpactsBase, POIPlotTask):
parts = []
if self.mc_stats:
parts.append("mcstats")
if self.only_parameters:
parts.append("only_" + law.util.create_hash(sorted(self.only_parameters)))
if self.skip_parameters:
parts.append("skip_" + law.util.create_hash(sorted(self.skip_parameters)))
if self.page >= 0:
......@@ -332,6 +345,7 @@ class PlotPullsAndImpacts(PullsAndImpactsBase, POIPlotTask):
data=data,
parameters_per_page=self.parameters_per_page,
selected_page=self.page,
only_parameters=self.only_parameters,
skip_parameters=self.skip_parameters,
order_parameters=self.order_parameters,
order_by_impact=self.order_by_impact,
......
......@@ -23,7 +23,7 @@ from dhi.tasks.likelihoods import (
from dhi.tasks.significances import PlotSignificanceScan, PlotMultipleSignificanceScans
from dhi.tasks.pulls_impacts import PlotPullsAndImpacts
from dhi.tasks.exclusion import PlotExclusionAndBestFit, PlotExclusionAndBestFit2D
from dhi.tasks.postfit import PlotPostfitSOverB
from dhi.tasks.postfit import PlotPostfitSOverB, PlotNuisanceLikelihoodScans
from dhi.tasks.gof import PlotGoodnessOfFit, PlotMultipleGoodnessOfFits
from dhi.tasks.studies.model_selection import (
PlotMorphingScales, PlotMorphedDiscriminant, PlotStatErrorScan,
......@@ -64,6 +64,7 @@ class TestPlots(six.with_metaclass(TestRegister, AnalysisTask)):
"exclusion_and_bestfit",
"exclusion_and_bestfit_2d",
"postfit_s_over_b",
"nuisance_likelihood_scans",
"goodness_of_fit",
"multiple_goodness_of_fits",
"morphing_scales",
......@@ -226,6 +227,15 @@ class TestPlots(six.with_metaclass(TestRegister, AnalysisTask)):
show_parameters=("kl", "kt"),
)
if self.check_enabled("nuisance_likelihood_scans"):
reqs["nuisance_likelihood_scans"] = PlotNuisanceLikelihoodScans.req(self,
datacards=ggf_cards,
pois=("r",),
parameters_per_page=6,
y_log=True,
show_parameters=("kl", "kt"),
)
if self.check_enabled("goodness_of_fit"):
reqs["goodness_of_fit"] = PlotGoodnessOfFit.req(self,
datacards=ggf_cards,
......
......@@ -114,7 +114,7 @@ More detailed information on the different scans & fits can be found in the foll
</div>
</a>
<a href="tasks/postfit.html">
<a href="tasks/postfit.html#combined-postfit-shapes">
<div class="dhi_image_box">
<div>
<img src="images/postfitsoverb__poi_r__params_r_qqhh1.0_r_gghh1.0_kl1.0_kt1.0_CV1.0_C2V1.0.png" />
......@@ -169,6 +169,17 @@ More detailed information on the different scans & fits can be found in the foll
</div>
</a>
<a href="tasks/postfit.html#nuisance-parameter-influence-on-likelihood">
<div class="dhi_image_box">
<div>
<img src="images/nlls__-2.0To2.0__poi_r__params_r_qqhh1.0_r_gghh1.0_kl1.0_kt1.0_CV1.0_C2V1.0__log.png" />
</div>
<div>
Nuisance influence on likelihood
</div>
</div>
</a>
<div style="clear: both;"></div>
</div>
......
......@@ -2,6 +2,6 @@ The `MergePullsAndImpacts` task collects the fit results from the `PullsAndImpac
<div class="dhi_parameter_table">
--8<-- "content/snippets/parameters.md@-2,20,19,16,14,48,17,18,28,33"
--8<-- "content/snippets/parameters.md@-2,20,19,16,14,48,17,18,56,28,33"
</div>
......@@ -53,3 +53,4 @@
| `--postfit-toys` | Create frequentist postfit toys, which depend on observed data. Defaults to `False`. |
| `--scan-parameters` | Colon-separated parameters to scan, each in the format `name[,start,stop[,points]]`. When the same scan parameter name is used multiple times, the corresponding scan ranges are joined and the order of first occurrence is preserved. Defaults to `kl,-30,30,61`. |
| `--best-fit` | When `True`, the POIs best fit value is computed via likelihood profiling and shown as well. Defaults to `True`. |
| `--only-parameters` | Comma-separated names of parameters to include, with pattern support All others are skipped. No default. |
The `PlotNuisanceLikelihoodScans` task collects results from the `FitDiagnostics` task and visualizes the change of the negative likelihood when varying nuisance parameter values.
<div class="dhi_parameter_table">
--8<-- "content/snippets/parameters.md@-2,20,19,16,48,14,12,17,18,56,28,27,3,5,6,9"
</div>
......@@ -2,6 +2,6 @@ The `PlotPullsAndImpacts` task collects fit results from `MergePullsAndImpacts`
<div class="dhi_parameter_table">
--8<-- "content/snippets/parameters.md@-2,20,19,16,14,48,17,18,28,29,33,27,52,30,31,32,3,4"
--8<-- "content/snippets/parameters.md@-2,20,19,16,14,48,17,18,56,28,29,33,27,52,30,31,32,3,4"
</div>
......@@ -3,6 +3,6 @@ It provides some handy cli parameters to manipulate POIs, ranges and other optio
<div class="dhi_parameter_table">
--8<-- "content/snippets/parameters.md@-2,20,19,16,14,48,17,18,28,33,13,34,36"
--8<-- "content/snippets/parameters.md@-2,20,19,16,14,48,17,18,56,28,33,13,34,36"
</div>
### Combined postfit shapes
The `PlotPostfitSOverB` task reads prefit and postfit shapes from combine's `FitDiagnostics` output, orders their bins by their prefit S-over-B ratio, merges them in a configurable way, and shows the bin contents separately for background-only, the fitted signal and recorded data.
- [Quick example](#quick-example)
......@@ -65,3 +67,77 @@ law run PlotPostfitSOverB \
```
Note the quotes around the bin edges which are required since the first value starts with a dash ("-") which is misinterpreted by the `ArgumentParser` in Python 2.
### Nuisance parameter influence on likelihood
The `PlotNuisanceLikelihoodScans` task reads the outputs of combine's `FitDiagnostics` and illustrates the change of the postfit negative likelihood when changing particular nuisance parameter values.
This is especially useful for debugging nuisances and inspecting their influence on the likelihood during profiling, which should exhibit a smooth effect.
All plots are stored in the same pdf.
There is also the option to draw the curves of multiple parameters in the same plot for better comparison.