Skip to content
Snippets Groups Projects

add plot_likelihood_scan_1d, fix the best fit issue

Open Yihui Lai requested to merge yilai/inference:sync into master
2 files
+ 425
2
Compare changes
  • Side-by-side
  • Inline
Files
2
+ 424
1
@@ -34,7 +34,7 @@ colors = colors.root
@use_style("dhi_default")
def plot_likelihood_scans_1d(
def plot_likelihood_scan_1d(
paths,
poi,
data,
@@ -472,6 +472,429 @@ def plot_likelihood_scans_1d(
if hep_data_path:
hdt.save_hep_data(hep_data, hep_data_path)
@use_style("dhi_default")
def plot_likelihood_scans_1d(
paths,
poi,
data,
theory_value=None,
ranges_path=None,
hep_data_path=None,
show_best_fit=False,
show_best_fit_error=True,
show_best_fit_line=None,
show_best_fit_indicators=None,
show_significances=(1, 2, 3, 5),
shift_negative_values=False,
interpolate_above=None,
v_lines=None,
x_min=None,
x_max=None,
y_min=None,
y_max=None,
y_log=False,
model_parameters=None,
campaign=None,
show_points=True,
cms_postfix=None,
style=None,
):
"""
Plots multiple curves of 1D likelihood scans of a POI *poi1* and *poi2*, and saves it at *paths*.
All information should be passed as a list *data*. Entries must be dictionaries with the
following content:
- "values": A mapping to lists of values or a record array with keys "<poi1_name>",
"<poi2_name>" and "dnll2".
- "poi_mins": A list of two floats describing the best fit value of the two POIs. When not
set, the minima are estimated from the interpolated curve.
- "name": A name of the data to be shown in the legend.
*theory_value* can be a 3-tuple denoting the nominal theory prediction of the POI and its up and
down uncertainties which is drawn as a vertical bar. When *ranges_path* is set, one and two
sigma intervals of the scan parameter are saved to the given file. When *hep_data_path* is set,
a yml data file compatible with the HEPData format
(https://hepdata-submission.readthedocs.io/en/latest/data_yaml.html) is stored at that path.
When *show_best_fit* (*show_best_fit_error*) is *True*, the best fit error value (and its
uncertainty) is shown in the corresponding legend entry. When *show_best_fit_line* is *True*, a
vertical line is shown at the position of the best fit value. When *show_best_fit_indicators* is
*True* and only a single scan is shown, vertical indicators of the one and two sigma intervals
of the best fit value, when requested in *show_significances*, are shown. The two latter
arguments default to the value of *show_best_fit*.
To overlay lines and labels denoting integer significances corresponding to 1D likelihood scans,
*show_significances* can be set to *True* to show significances up to 9 sigma, or a list of
sigmas (integer, >= 1) or confidence levels (float, < 1). In case there are negative dnll2
values, *shift_negative_values* can be set to *True* to shift them vertically so that the
minimum is located at 0 again. When *interpolate_above* is defined, values that exceed this
threshold are removed and interpolated using adjacent values instead.
*v_lines* can be a list of x-values at which vertical, dashed lines are drawn for visual
guidance. *x_min* and *x_max* define the x-axis range of POI, and *y_min* and *y_max* control
the range of the y-axis. When *y_log* is *True*, the y-axis is plotted with a logarithmic scale.
When *model_parameters* can be a dictionary of key-value pairs of model parameters. *campaign*
should refer to the name of a campaign label defined in *dhi.config.campaign_labels*. When
*show_points* is *True*, the central scan points are drawn on top of the interpolated curve.
*cms_postfix* is shown as the postfix behind the CMS label.
Supported values for *style*:
- "paper"
Example: https://cms-hh.web.cern.ch/tools/inference/tasks/likelihood.html#1d_1
"""
import plotlib.root as r
ROOT = import_ROOT()
# style-based adjustments
style = Style.new(style)
if style.matches("paper"):
cms_postfix = None
# input checks and transformations
if theory_value is not None:
theory_value = make_list(theory_value)
if not show_best_fit:
show_best_fit_error = False
if show_best_fit_line is None:
show_best_fit_line = show_best_fit
if show_best_fit_indicators is None:
show_best_fit_indicators = show_best_fit
# validate data entries
for i, d in enumerate(data):
# convert likelihood values to arrays
assert "values" in d
values = d["values"]
if isinstance(values, np.ndarray):
values = {k: values[k] for k in values.dtype.names}
assert poi in values
assert "dnll2" in values
# check poi minimum
d.setdefault("poi_mins", None)
if not isinstance(d["poi_mins"], (list, tuple)):
d["poi_mins"] = [d["poi_mins"]]
# default name
d.setdefault("name", str(i + 1))
# origin (for printouts)
d["origin"] = None if not d["name"] else "entry '{}'".format(d["name"])
# drop all fields except for required ones
values = {
k: np.array(v, dtype=np.float32)
for k, v in values.items()
if k in [poi, "dnll2"]
}
# preprocess values (nan detection, negative shift)
values["dnll2"], values[poi] = _preprocess_values(
values["dnll2"],
(poi, values[poi]),
remove_nans=True,
remove_above=interpolate_above,
shift_negative_values=shift_negative_values,
origin=d["origin"],
min_is_external=d["poi_mins"] is not None,
)
d["values"] = values
# prepare hep data
hep_data = None
if hep_data_path:
hep_data = hdt.create_hist_data()
# perform scans
scans = [
evaluate_likelihood_scan_1d(
d["values"][poi],
d["values"]["dnll2"],
poi_min=d["poi_mins"][0],
origin=d["origin"],
)
for d in data
]
# set x range
if x_min is None:
x_min = min([min(d["values"][poi]) for d in data])
if x_max is None:
x_max = max([max(d["values"][poi]) for d in data])
# set y range
y_max_value = max([
d["values"]["dnll2"][(d["values"][poi] >= x_min) & (d["values"][poi] <= x_max)].max()
for d in data
])
y_min_value = min([
d["values"]["dnll2"][(d["values"][poi] >= x_min) & (d["values"][poi] <= x_max)].min()
for d in data
])
y_min, y_max, y_max_line = get_y_range(y_min_value, y_max_value, y_min, y_max, log=y_log)
# start plotting
r.setup_style()
canvas, (pad,) = r.routines.create_canvas(pad_props={"Logy": y_log})
pad.cd()
draw_objs = []
legend_entries = []
# dummy histogram to control axes
x_title = to_root_latex(poi_data[poi].label)
if "unit" in poi_data[poi]:
x_title = "{} ({})".format(x_title, to_root_latex(poi_data[poi].unit))
y_title = "-2 #Delta log(L)"
h_dummy = ROOT.TH1F("dummy", ";{};{}".format(x_title, y_title), 1, x_min, x_max)
r.setup_hist(
h_dummy,
pad=pad,
props={"LineWidth": 0, "Minimum": y_min, "Maximum": y_max},
)
draw_objs.append((h_dummy, "HIST"))
if show_significances:
# horizontal significance lines and labels
if isinstance(show_significances, (list, tuple)):
sigs = list(show_significances)
else:
sigs = list(range(1, 9 + 1))
for sig in sigs:
# get the dnll2 value and build the label
is_cl = isinstance(sig, float) and sig < 1
if is_cl:
# convert confidence level to dnll2 value
dnll2 = get_chi2_level_from_cl(sig, 1)
else:
# convert significance to dnll2 value
sig = int(round_scientific(sig))
dnll2 = get_chi2_level(sig, 1)
# do not show when vertically out of range
if dnll2 >= y_max_line:
continue
# vertical indicators at 1 and 2 sigma when only one curve is shown
if show_best_fit_indicators and len(data) == 1 and scans[0] and sig in [1, 2]:
values = list(map(lambda s: getattr(scans[0], "poi_{}{}".format(s, sig)), "pm"))
for value in values:
if value is None or not (x_min < value < x_max):
continue
line = ROOT.TLine(value, y_min, value, scans[0].interp(value))
r.setup_line(
line,
props={"LineColor": colors.black, "LineStyle": 2, "NDC": False},
)
draw_objs.append(line)
# create the line
sig_line = ROOT.TLine(x_min, dnll2, x_max, dnll2)
r.setup_line(
sig_line,
props={"NDC": False, "LineWidth": 1},
color=colors.grey,
)
draw_objs.append(sig_line)
# create and position the label
if y_log:
sig_label_y = math.log(dnll2 / y_min) / math.log(y_max / y_min)
else:
sig_label_y = dnll2 / (y_max - y_min)
sig_label_y *= 1.0 - pad.GetTopMargin() - pad.GetBottomMargin()
sig_label_y += pad.GetBottomMargin() + 0.00375
if is_cl:
sig_label = "{:f}".format(sig * 100).rstrip("0").rstrip(".") + "%"
else:
sig_label = "{}#sigma".format(sig)
sig_label = r.routines.create_top_right_label(
sig_label,
pad=pad,
x_offset=12,
y=sig_label_y,
props={"TextSize": 18, "TextColor": colors.grey, "TextAlign": 31},
)
draw_objs.append(sig_label)
# draw verical lines at requested values
if v_lines:
for x in v_lines:
if x_min < x < x_max:
line = ROOT.TLine(x, y_min, x, y_max_line)
r.setup_line(
line,
props={"LineColor": colors.black, "LineStyle": 2, "NDC": False},
)
draw_objs.append(line)
# special case regarding color handling: when all entry names are valid keys in br_hh_colors,
# replace the default color sequence to deterministically assign same colors to channels
_color_sequence = color_sequence
if all(d["name"] in br_hh_colors.root for d in data):
_color_sequence = [br_hh_colors.root[d["name"]] for d in data]
# perform scans and draw nll curves
parameter_ranges = OrderedDict()
g_nlls = []
for d, scan, col, ms in zip(data, scans, _color_sequence, marker_sequence):
if not scan:
warn("1D likelihood evaluation failed for entry '{}'".format(d["name"]))
# draw the curve
g_nll = create_tgraph(
len(d["values"][poi]),
d["values"][poi],
d["values"]["dnll2"],
)
r.setup_graph(
g_nll,
props={"LineWidth": 2, "MarkerStyle": ms, "MarkerSize": 1.2},
color=colors[col],
)
draw_objs.append((g_nll, "SAME,CP" if show_points else "SAME,C"))
g_nlls.append(g_nll)
# legend entry with optional best fit value
g_label = expand_hh_channel_label(d["name"])
if scan and show_best_fit:
if show_best_fit_error:
bf_label = scan.num_min.str(
format="%.2f",
style="root",
force_asymmetric=True,
styles={"space": ""},
)
else:
bf_label = "{:.2f}".format(scan.num_min())
if g_label:
g_label += ", {}".format(bf_label)
else:
g_label = "{} = {}".format(to_root_latex(poi_data[poi].label), bf_label)
legend_entries.append((g_nll, g_label, "LP" if show_points else "L"))
# vertical line denoting the best fit value
if show_best_fit_line and scan and (x_min <= scan.poi_min <= x_max):
line_fit = ROOT.TLine(scan.poi_min, y_min, scan.poi_min, y_max_line)
r.setup_line(
line_fit,
props={"LineWidth": 2, "NDC": False},
color=colors[col],
)
draw_objs.append(line_fit)
# store parameter ranges
key = poi
if d["name"]:
key += "__{}".format(d["name"])
parameter_ranges[key] = scan["summary"] if scan else None
# theory prediction with uncertainties
if theory_value:
has_thy_err = len(theory_value) == 3
if has_thy_err:
# theory graph
g_thy = create_tgraph(
1,
theory_value[0],
y_min,
theory_value[2],
theory_value[1],
0,
y_max_line - y_min,
)
r.setup_graph(
g_thy,
props={"LineColor": colors.red, "FillStyle": 1001, "FillColor": colors.red_trans_50},
)
draw_objs.append((g_thy, "SAME,02"))
legend_entries.append((g_thy, "Standard Model", "LF"))
# theory line
line_thy = ROOT.TLine(theory_value[0], y_min, theory_value[0], y_max_line)
r.setup_line(line_thy, props={"NDC": False}, color=colors.red)
draw_objs.append(line_thy)
if not has_thy_err:
legend_entries.append((line_thy, "Standard Model", "L"))
# fill hep data
if hep_data:
# scan value as independent variable
scan_values = sorted(set(chain(*(map(float, d["values"][poi]) for d in data))))
hdt.create_independent_variable(
poi_data[poi].label,
parent=hep_data,
values=[Number(v, default_format=-2) for v in scan_values],
)
# dnll2 values as dependent variables
for d, g_nll in zip(data, g_nlls):
label = r"$-2\Delta\log(L)$"
if d.get("name"):
label += ", " + d["name"]
hdt.create_dependent_variable_from_graph(
g_nll,
x_values=scan_values,
parent=hep_data,
label=label,
rounding_method=-2,
transform=(lambda i, x, y, err: (x, max(y, 0.0), err)),
)
# legend
legend_cols = min(int(math.ceil(len(legend_entries) / 4.0)), 3)
legend_rows = int(math.ceil(len(legend_entries) / float(legend_cols)))
legend = r.routines.create_legend(
pad=pad,
width=legend_cols * 210,
n=legend_rows,
props={"NColumns": legend_cols, "TextSize": 18},
)
r.fill_legend(legend, legend_entries)
draw_objs.append(legend)
legend_box = r.routines.create_legend_box(
legend,
pad,
"trl",
props={"LineWidth": 0, "FillColor": colors.white_trans_70},
)
draw_objs.insert(-1, legend_box)
# cms label
cms_labels = r.routines.create_cms_labels(
pad=pad,
postfix=cms_postfix or "",
layout="outside_horizontal",
)
draw_objs.extend(cms_labels)
# model parameter labels
if model_parameters:
param_kwargs = {}
if legend_cols == 3:
param_kwargs["y_offset"] = 1.0 - 0.25 * pad.GetTopMargin() - legend.GetY1()
draw_objs.extend(create_model_parameters(model_parameters, pad, **param_kwargs))
# campaign label
if campaign:
campaign_label = to_root_latex(campaign_labels.get(campaign, campaign))
campaign_label = r.routines.create_top_right_label(campaign_label, pad=pad)
draw_objs.append(campaign_label)
# draw all objects
r.routines.draw_objects(draw_objs)
# save
r.update_canvas(canvas)
for path in make_list(paths):
canvas.SaveAs(path)
# save parameter ranges
if ranges_path:
ranges_path = prepare_output(ranges_path)
with open(ranges_path, "w") as f:
json.dump(parameter_ranges, f, indent=4)
print("saved parameter ranges to {}".format(ranges_path))
# save hep data
if hep_data_path:
hdt.save_hep_data(hep_data, hep_data_path)
@use_style("dhi_default")
def plot_likelihood_scan_2d(
Loading