Commit 604bfa40 authored by Marcel Rieger's avatar Marcel Rieger
Browse files

Refactor postfit nuisance likelihood scans.

parent 14a8cfe3
......@@ -21,7 +21,7 @@ from dhi.util import (
)
from dhi.plots.util import (
use_style, create_model_parameters, fill_hist_from_points, get_contours, get_y_range,
infer_binning_from_grid, get_contour_box,
infer_binning_from_grid, get_contour_box, make_parameter_label_map,
)
......@@ -817,6 +817,8 @@ def plot_nuisance_likelihood_scans(
only_parameters=None,
parameters_per_page=1,
sort_max=False,
show_diff=False,
labels=None,
scan_points=101,
x_min=-2.,
x_max=2,
......@@ -827,7 +829,7 @@ def plot_nuisance_likelihood_scans(
campaign=None,
paper=False,
):
"""
r"""
Creates a plot showing the change of the negative log-likelihood, obtained *poi*, when varying
values of nuisance paramaters and saves it at *paths*. The calculation of the likelihood change
requires the RooFit *workspace* to read the model config, a RooDataSet *dataset* to construct
......@@ -838,10 +840,21 @@ def plot_nuisance_likelihood_scans(
Nuisances to skip, or to show exclusively can be configured via *skip_parameters* and
*only_parameters*, respectively, which can be lists of patterns. *parameters_per_page* defines
the number of parameter curves that are drawn in the same canvas page. When *sort_max* is
*True*, the parameter are sorted by their highest likelihood change in the full scan range. The
scan range and granularity is set via *scan_points*, *x_min* and *x_max*. *y_min* and *y_max*
define the range of the y-axis, which is plotted with a logarithmic scale when *y_log* is
*True*. *model_parameters* can be a dictionary of key-value pairs of model parameters.
*True*, the parameter are sorted by their highest likelihood change in the full scan range.
By default, the x-axis shows absolute variations of the nuisance parameters (in terms of the
prefit range). When *show_diff* is *True*, it shows differences with respect to the best fit
value instead.
*labels* should be a dictionary or a json file containing a dictionary that maps nuisances names
to labels shown in the plot, a python file containing a function named "rename_nuisance", or a
function itself. When it is a dictionary and a key starts with "^" and ends with "$" it is
interpreted as a regular expression. Matched groups can be reused in the substituted name via
'\n' to reference the n-th group (following the common re.sub format). When it is a function, it
should accept the current nuisance label as a single argument and return the new value.
The scan range and granularity is set via *scan_points*, *x_min* and *x_max*. *y_min* and
*y_max* define the range of the y-axis, which is plotted with a logarithmic scale when *y_log*
is *True*. *model_parameters* can be a dictionary of key-value pairs of model parameters.
*campaign* should refer to the name of a campaign label defined in *dhi.config.campaign_labels*.
When *paper* is *True*, certain plot configurations are adjusted for use in publications.
......@@ -897,8 +910,9 @@ def plot_nuisance_likelihood_scans(
x_values, y_values = [], []
print("scanning parameter {}".format(name))
for x in scan_values:
param.setVal(param_bf + (pre_u if x >= 0 else -pre_d) * x)
x_values.append(param.getVal())
x_diff = x * (pre_u if x >= 0 else -pre_d)
param.setVal(param_bf + x_diff)
x_values.append(x_diff if show_diff else (param_bf + x_diff))
y_values.append(2 * (nll.getVal() - nll_base))
curve_data[name] = (x_values, y_values)
......@@ -918,6 +932,9 @@ def plot_nuisance_likelihood_scans(
else:
param_groups.append([name])
# prepare labels
labels = make_parameter_label_map(param_names, labels)
# go through nuisances
canvas = None
for names in param_groups:
......@@ -941,7 +958,10 @@ def plot_nuisance_likelihood_scans(
y_max, log=y_log)
# dummy histogram to control axes
x_title = "(#theta - #theta_{best}) / #Delta#theta_{pre}"
if show_diff:
x_title = "(#theta - #theta_{best}) / #Delta#theta_{pre}"
else:
x_title = "#theta / #Delta#theta_{pre}"
y_title = "Change in -2 log(L)"
h_dummy = ROOT.TH1F("dummy", ";{};{}".format(x_title, y_title), 1, x_min, x_max)
r.setup_hist(h_dummy, pad=pad, props={"LineWidth": 0, "Minimum": _y_min, "Maximum": _y_max})
......@@ -967,7 +987,8 @@ def plot_nuisance_likelihood_scans(
g_nll = create_tgraph(len(x), x, y)
r.setup_graph(g_nll, props={"LineWidth": 2, "LineStyle": 1}, color=colors[col])
draw_objs.append((g_nll, "SAME,C"))
legend_entries.append((g_nll, to_root_latex(name), "L"))
label = to_root_latex(labels.get(name, name))
legend_entries.append((g_nll, label, "L"))
# legend
legend_cols = min(int(math.ceil(len(legend_entries) / 4.)), 3)
......
......@@ -6,7 +6,6 @@ Pull and impact plots using ROOT.
import os
import json
import re
import math
import array
......@@ -15,9 +14,9 @@ import numpy as np
from dhi.config import poi_data, campaign_labels, colors, cms_postfix
from dhi.util import (
import_ROOT, import_file, multi_match, to_root_latex, linspace, colored, make_list,
import_ROOT, multi_match, to_root_latex, linspace, colored, make_list,
)
from dhi.plots.util import use_style
from dhi.plots.util import use_style, make_parameter_label_map
colors = colors.root
......@@ -143,42 +142,7 @@ def plot_pulls_impacts(
params = [params[i] for i in indices]
# prepare labels
if isinstance(labels, six.string_types):
labels = os.path.expandvars(os.path.expanduser(labels))
# try to load a renaming function called "rename_nuisance"
if labels.endswith(".py"):
labels = import_file(labels, attr="rename_nuisance")
if not callable(labels):
raise Exception("rename_nuisance loaded from {} is not callable".format(labels))
else:
with open(labels, "r") as f:
labels = json.load(f)
elif not labels:
labels = {}
if not isinstance(labels, dict):
# labels is a renaming function, call it for all parameters and store the result when
# names changed
_labels = {}
for param in params:
new_name = labels(param.name)
if new_name and new_name != param.name:
_labels[param.name] = new_name
labels = _labels
else:
# expand regular expressions through eager interpolation using parameter names
for k, v in labels.items():
if not k.startswith("^") or not k.endswith("$"):
continue
for param in params:
# skip explicit translations, effectively giving them priority
if param.name in labels:
continue
# apply the pattern
new_name = re.sub(k, v, param.name)
# store a translation label when the name has changed
if new_name != param.name:
labels[param.name] = new_name
labels = make_parameter_label_map([param.name for param in params], labels)
# determine the number of pages
if parameters_per_page < 1:
......
......@@ -4,9 +4,12 @@
Different helpers and ROOT style configurations to be used with plotlib.
"""
import os
import re
import math
import array
import uuid
import json
import functools
import itertools
import contextlib
......@@ -17,7 +20,7 @@ import numpy as np
import scipy.interpolate
from dhi.config import poi_data, br_hh_names
from dhi.util import import_ROOT, try_int, to_root_latex, make_list
from dhi.util import import_ROOT, import_file, try_int, to_root_latex, make_list
_styles = {}
......@@ -131,6 +134,48 @@ def determine_limit_digits(limit, is_xsec=False):
return 0
def make_parameter_label_map(parameter_names, labels=None):
# prepare labels
if isinstance(labels, six.string_types):
labels = os.path.expandvars(os.path.expanduser(labels))
# try to load a renaming function called "rename_nuisance"
if labels.endswith(".py"):
labels = import_file(labels, attr="rename_nuisance")
if not callable(labels):
raise Exception("rename_nuisance loaded from {} is not callable".format(labels))
else:
with open(labels, "r") as f:
labels = json.load(f)
elif not labels:
labels = {}
if not isinstance(labels, dict):
# labels is a renaming function, call it for all parameters and store the result when
# names changed
_labels = {}
for name in parameter_names:
new_name = labels(name)
if new_name:
_labels[name] = new_name
labels = _labels
else:
# expand regular expressions through eager interpolation using parameter names
for k, v in labels.items():
if not k.startswith("^") or not k.endswith("$"):
continue
for name in parameter_names:
# skip explicit translations, effectively giving them priority
if name in labels:
continue
# apply the pattern
new_name = re.sub(k, v, name)
# store a translation label when set
if new_name:
labels[name] = new_name
return labels
def get_y_range(y_min_value, y_max_value, y_min=None, y_max=None, log=False, y_min_log=1e-3,
top_margin=0.38, visible_margin=0.4):
if log:
......
......@@ -13,6 +13,7 @@ import luigi
from dhi.tasks.base import HTCondorWorkflow, view_output_plots
from dhi.tasks.combine import CombineCommandTask, POITask, POIPlotTask, CreateWorkspace
from dhi.tasks.snapshot import Snapshot, SnapshotUser
from dhi.tasks.pulls_impacts import PlotPullsAndImpacts
class SAVEFLAGS(str, enum.Enum):
......@@ -313,6 +314,12 @@ class PlotNuisanceLikelihoodScans(PostfitPlotBase):
description="when True, sort parameters by their hightest likelihood change in the scan "
"range; mostly useful when the number of parameters per page is > 1; default: False",
)
show_diff = luigi.BoolParameter(
default=False,
description="when True, the x-axis shows differences of nuisance parameters with respect "
"to the best fit value instead of absolute values; default: False",
)
labels = PlotPullsAndImpacts.labels
mc_stats_patterns = ["*prop_bin*"]
......@@ -323,10 +330,17 @@ class PlotNuisanceLikelihoodScans(PostfitPlotBase):
force_n_pois = 1
def requires(self):
return FitDiagnostics.req(self, skip_save=("WithUncertainties",))
# normally, we would require FitDiagnostics without saved uncertainties no matter what,
# but since it could be already complete, use it when it does exist
full_fd = FitDiagnostics.req(self)
if full_fd.complete():
return full_fd
return FitDiagnostics.req(self, skip_save=("WithUncertainties",), _prefer_cli=["skip_save"])
def output(self):
parts = ["nlls", "{}To{}".format(self.x_min, self.x_max), self.get_output_postfix()]
if self.show_diff:
parts.append("diffs")
if self.y_log:
parts.append("log")
if self.sort_max:
......@@ -375,6 +389,8 @@ class PlotNuisanceLikelihoodScans(PostfitPlotBase):
skip_parameters=skip_parameters,
parameters_per_page=self.parameters_per_page,
sort_max=self.sort_max,
show_diff=self.show_diff,
labels=None if self.labels == law.NO_STR else self.labels,
x_min=self.x_min,
x_max=self.x_max,
y_min=self.get_axis_limit("y_min"),
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment