Commit e2c25d17 authored by Marcel Rieger's avatar Marcel Rieger
Browse files

Merge branch 'feature/save_exclusion_ranges' into 'master'

[UpperLimits] add parameter to save the exclusion ranges in an additional output file

See merge request !47
parents f463e03d f7aa67a3
Pipeline #3476158 skipped with stage
......@@ -4,6 +4,8 @@
Limit plots using ROOT.
"""
import os
import json
import math
import traceback
......@@ -43,6 +45,7 @@ def plot_limit_scan(
y_max=None,
xsec_unit=None,
hh_process=None,
ranges_path=None,
model_parameters=None,
campaign=None,
show_points=False,
......@@ -63,11 +66,13 @@ def plot_limit_scan(
when *None*, as a ratio over the theory prediction. *hh_process* can be the name of a HH
subprocess configured in *dhi.config.br_hh_names* and is inserted to the process name
in the title of the y-axis and indicates that the plotted cross section data was (e.g.) scaled
by a branching ratio. *model_parameters* can be a dictionary of key-value pairs of model
parameters. *campaign* should refer to the name of a campaign label defined in
*dhi.config.campaign_labels*. When *show_points* is *True*, the central scan points are drawn
on top of the interpolated curve. When *paper* is *True*, certain plot configurations are
adjusted for use in publications.
by a branching ratio.
When *ranges_path* is set, allowed scan parameter ranges are saved to the given file.
*model_parameters* can be a dictionary of key-value pairs of model parameters. *campaign* should
refer to the name of a campaign label defined in *dhi.config.campaign_labels*. When
*show_points* is *True*, the central scan points are drawn on top of the interpolated curve.
When *paper* is *True*, certain plot configurations are adjusted for use in publications.
Example: https://cms-hh.web.cern.ch/tools/inference/tasks/limits.html#limit-on-poi-vs-scan-parameter
"""
......@@ -92,6 +97,10 @@ def plot_limit_scan(
if theory_values is not None:
theory_values = check_values(theory_values, ["xsec"])
has_thy_err = "xsec_p1" in theory_values and "xsec_m1" in theory_values
if ranges_path:
ranges_path = os.path.expandvars(os.path.expanduser(ranges_path))
if not os.path.exists(os.path.dirname(ranges_path)):
os.makedirs(os.path.dirname(ranges_path))
# set default ranges
if x_min is None:
......@@ -161,12 +170,17 @@ def plot_limit_scan(
y_min_value = min(y_min_value, min(expected_values["limit"]))
# print the expected excluded ranges
print_excluded_ranges(scan_parameter, poi + " expected",
excl_ranges = evaluate_excluded_ranges(scan_parameter, poi + " expected",
expected_values[scan_parameter],
expected_values["limit"],
theory_values[scan_parameter] if has_thy else None,
theory_values["xsec"] if has_thy else None,
)
allowed_ranges = {}
if ranges_path:
key = "__".join([scan_parameter, poi, "expected"])
allowed_ranges[key] = excluded_to_allowed_ranges(excl_ranges,
expected_values[scan_parameter].min(), expected_values[scan_parameter].max())
# observed values
if observed_values is not None:
......@@ -176,13 +190,23 @@ def plot_limit_scan(
legend_entries[0] = (g_inj, "Observed", "L")
y_max_value = max(y_max_value, max(observed_values["limit"]))
y_min_value = min(y_min_value, min(observed_values["limit"]))
# print the observed excluded ranges
print_excluded_ranges(scan_parameter, poi + " observed",
excl_ranges = evaluate_excluded_ranges(scan_parameter, poi + " observed",
observed_values[scan_parameter],
observed_values["limit"],
theory_values[scan_parameter] if has_thy else None,
theory_values["xsec"] if has_thy else None,
)
if ranges_path:
key = "__".join([scan_parameter, poi, "observed"])
allowed_ranges[key] = excluded_to_allowed_ranges(excl_ranges,
observed_values[scan_parameter].min(), observed_values[scan_parameter].max())
if ranges_path:
with open(ranges_path, "w") as f:
json.dump(allowed_ranges, f, indent=4)
print("saved allowed ranges to file")
# get theory prediction limits
if has_thy:
......@@ -282,6 +306,7 @@ def plot_limit_scans(
y_max=None,
xsec_unit=None,
hh_process=None,
ranges_path=None,
model_parameters=None,
campaign=None,
show_points=True,
......@@ -304,10 +329,13 @@ def plot_limit_scans(
when *None*, as a ratio over the theory prediction. *hh_process* can be the name of a HH
subprocess configured in *dhi.config.br_hh_names* and is inserted to the process name in the
title of the y-axis and indicates that the plotted cross section data was (e.g.) scaled by a
branching ratio. *model_parameters* can be a dictionary of key-value pairs of model parameters.
*campaign* should refer to the name of a campaign label defined in dhi.config.campaign_labels.
When *show_points* is *True*, the central scan points are drawn on top of the interpolated
curve. When *paper* is *True*, certain plot configurations are adjusted for use in publications.
branching ratio.
When *ranges_path* is set, all allowed scan parameter ranges are saved to the given file.
*model_parameters* can be a dictionary of key-value pairs of model parameters. *campaign* should
refer to the name of a campaign label defined in dhi.config.campaign_labels. When *show_points*
is *True*, the central scan points are drawn on top of the interpolated curve. When *paper* is
*True*, certain plot configurations are adjusted for use in publications.
Example: https://cms-hh.web.cern.ch/tools/inference/tasks/limits.html#multiple-limits-on-poi-vs-scan-parameter
"""
......@@ -345,6 +373,11 @@ def plot_limit_scans(
assert scan_parameter in theory_values
assert "xsec" in theory_values
has_thy_err = "xsec_p1" in theory_values and "xsec_m1" in theory_values
if ranges_path:
ranges_path = os.path.expandvars(os.path.expanduser(ranges_path))
if not os.path.exists(os.path.dirname(ranges_path)):
os.makedirs(os.path.dirname(ranges_path))
allowed_ranges = {}
# set default ranges
if x_min is None:
......@@ -378,8 +411,6 @@ def plot_limit_scans(
# central values
for i, (ev, name, col, ms) in enumerate(zip(expected_values, names, _color_sequence[:n_graphs],
marker_sequence[:n_graphs])):
# name = names[n_graphs - i - 1]
# expected graph
mask = ~np.isnan(ev["limit"])
limit_values = ev["limit"][mask]
......@@ -397,12 +428,16 @@ def plot_limit_scans(
y_min_value = min(y_min_value, min(limit_values))
# print expected excluded ranges
print_excluded_ranges(scan_parameter, "{}, {}, expected".format(poi, name),
excl_ranges = evaluate_excluded_ranges(scan_parameter, "{}, {}, expected".format(poi, name),
scan_values,
limit_values,
theory_values[scan_parameter] if has_thy else None,
theory_values["xsec"] if has_thy else None,
)
if ranges_path:
key = "__".join([scan_parameter, poi, "expected", name])
allowed_ranges[key] = excluded_to_allowed_ranges(excl_ranges,
scan_values.min(), scan_values.max())
# observed graph
if has_obs:
......@@ -424,12 +459,21 @@ def plot_limit_scans(
y_min_value = min(y_min_value, min(obs_limit_values))
# print observed excluded ranges
print_excluded_ranges(scan_parameter, "{}, {}, observed".format(poi, name),
excl_ranges = evaluate_excluded_ranges(scan_parameter, "{}, {}, observed".format(poi, name),
obs_scan_values,
obs_limit_values,
theory_values[scan_parameter] if has_thy else None,
theory_values["xsec"] if has_thy else None,
)
if ranges_path:
key = "__".join([scan_parameter, poi, "observed", name])
allowed_ranges[key] = excluded_to_allowed_ranges(excl_ranges,
obs_scan_values.min(), obs_scan_values.max())
if ranges_path:
with open(ranges_path, "w") as f:
json.dump(allowed_ranges, f, indent=4)
print("saved allowed ranges to file")
# add additional legend entries to distinguish expected and observed lines
if has_obs:
......@@ -1341,12 +1385,12 @@ def evaluate_limit_scan_1d(scan_values, limit_values, xsec_scan_values=None, xse
)
def print_excluded_ranges(param_name, scan_name, scan_values, limit_values, xsec_scan_values=None,
xsec_values=None, interpolation="linear"):
def evaluate_excluded_ranges(param_name, scan_name, scan_values, limit_values,
xsec_scan_values=None, xsec_values=None, interpolation="linear"):
# more than 5 points are required
if len(scan_values) <= 5:
print("insufficient number of scan points for extracting excluded ranges")
return
return None
try:
ranges = evaluate_limit_scan_1d(
......@@ -1359,10 +1403,13 @@ def print_excluded_ranges(param_name, scan_name, scan_values, limit_values, xsec
except Exception:
print("1D limit scan evaluation failed")
traceback.print_exc()
return
return None
# print them
_print_excluded_ranges(param_name, scan_name, scan_values, ranges, interpolation)
return ranges
def _print_excluded_ranges(param_name, scan_name, scan_values, ranges, interpolation):
# helper to check if the granularity of scan values is too small at a certain point
......@@ -1413,3 +1460,39 @@ def _print_excluded_ranges(param_name, scan_name, scan_values, ranges, interpola
for start, stop in ranges:
print(" - " + format_range(start, stop))
print("")
def excluded_to_allowed_ranges(excluded_ranges, min_value, max_value):
"""
Converts a list *ranges* of 2-tuples with edges referring to excluded ranges to allowed ones,
taking into account the endpoints given by *min_value* and *max_value*. An open range is denoted
by a *None*. This, (*None*, *None*) would mean that the entire range is allowed.
"""
# shorthands
e_ranges = excluded_ranges
a_ranges = []
# excluded_ranges might mark open ends with None's as well, so before linearizing values,
# replace them with the global endpoints
if e_ranges:
if e_ranges[0][0] is None:
e_ranges[0] = (min_value, e_ranges[0][0])
if e_ranges[-1][1] is None:
e_ranges[-1] = (e_ranges[-1][0], max_value)
# linearize excluded ranges (assuming edges are always reasonably far apart)
e_edges = sum(map(list, excluded_ranges), [])
# loop through adjacent pairs and create allowed ranges in an alternating fashion
for i, (start, stop) in enumerate(zip(e_edges[:-1], e_edges[1:])):
# first range
if i == 0 and start > min_value:
a_ranges.append((min_value, start))
# last range
if i == len(e_edges) - 2 and stop < max_value:
a_ranges.append((stop, max_value))
# intermediate ranges
if i % 2 != 0:
a_ranges.append((start, stop))
return a_ranges
......@@ -193,6 +193,10 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask):
significant=False,
description="show points of central limit values; default: False",
)
save_ranges = luigi.BoolParameter(
default=False,
description="save allowed parameter ranges in an additional output; default: False",
)
z_min = None
z_max = None
......@@ -229,6 +233,8 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask):
]
def output(self):
outputs = {}
# additional postfix
parts = []
if self.xsec in ["pb", "fb"]:
......@@ -239,7 +245,13 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask):
parts.append("log")
names = self.create_plot_names(["limits", self.get_output_postfix(), parts])
return [self.local_target(name) for name in names]
outputs["plots"] = [self.local_target(name) for name in names]
if self.save_ranges:
outputs["ranges"] = self.local_target("ranges_{}.json".format(
self.get_output_postfix()))
return outputs
@law.decorator.log
@law.decorator.notify
......@@ -250,7 +262,7 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask):
# prepare the output
outputs = self.output()
outputs[0].parent.touch()
outputs["plots"][0].parent.touch()
# load limit values
limit_values = self.load_scan_data(self.input())
......@@ -315,7 +327,7 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask):
# call the plot function
self.call_plot_func(
"dhi.plots.limits.plot_limit_scan",
paths=[outp.path for outp in outputs],
paths=[outp.path for outp in outputs["plots"]],
poi=self.poi,
scan_parameter=self.scan_parameter,
expected_values=limit_values,
......@@ -328,6 +340,7 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask):
y_log=self.y_log,
xsec_unit=xsec_unit,
hh_process=self.br if xsec_unit and self.br in br_hh else None,
ranges_path=outputs["ranges"].path if self.save_ranges else None,
model_parameters=self.get_shown_parameters(),
campaign=self.campaign if self.campaign != law.NO_STR else None,
show_points=self.show_points,
......@@ -370,6 +383,8 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask):
]
def output(self):
outputs = {}
# additional postfix
parts = []
if self.xsec in ["pb", "fb"]:
......@@ -380,7 +395,13 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask):
parts.append("log")
names = self.create_plot_names(["multilimits", self.get_output_postfix(), parts])
return [self.local_target(name) for name in names]
outputs["plots"] = [self.local_target(name) for name in names]
if self.save_ranges:
outputs["ranges"] = self.local_target("ranges_{}.json".format(
self.get_output_postfix()))
return outputs
@law.decorator.log
@law.decorator.notify
......@@ -391,7 +412,7 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask):
# prepare the output
outputs = self.output()
outputs[0].parent.touch()
outputs["plots"][0].parent.touch()
# load limit values
limit_values = []
......@@ -461,7 +482,7 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask):
# call the plot function
self.call_plot_func(
"dhi.plots.limits.plot_limit_scans",
paths=[outp.path for outp in outputs],
paths=[outp.path for outp in outputs["plots"]],
poi=self.poi,
scan_parameter=self.scan_parameter,
names=names,
......@@ -475,6 +496,7 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask):
y_log=self.y_log,
xsec_unit=xsec_unit,
hh_process=self.br if xsec_unit and self.br in br_hh else None,
ranges_path=outputs["ranges"].path if self.save_ranges else None,
model_parameters=self.get_shown_parameters(),
campaign=self.campaign if self.campaign != law.NO_STR else None,
show_points=self.show_points,
......@@ -496,6 +518,8 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask):
]
def output(self):
outputs = {}
# additional postfix
parts = []
if self.xsec in ["pb", "fb"]:
......@@ -506,7 +530,13 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask):
parts.append("log")
names = self.create_plot_names(["multilimitsbymodel", self.get_output_postfix(), parts])
return [self.local_target(name) for name in names]
outputs["plots"] = [self.local_target(name) for name in names]
if self.save_ranges:
outputs["ranges"] = self.local_target("ranges_{}.json".format(
self.get_output_postfix()))
return outputs
@law.decorator.log
@law.decorator.notify
......@@ -517,7 +547,7 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask):
# prepare the output
outputs = self.output()
outputs[0].parent.touch()
outputs["plots"][0].parent.touch()
# load limit values
limit_values = []
......@@ -595,7 +625,7 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask):
# call the plot function
self.call_plot_func(
"dhi.plots.limits.plot_limit_scans",
paths=[outp.path for outp in outputs],
paths=[outp.path for outp in outputs["plots"]],
poi=self.poi,
scan_parameter=self.scan_parameter,
names=names,
......@@ -609,6 +639,7 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask):
y_log=self.y_log,
xsec_unit=xsec_unit,
hh_process=self.br if xsec_unit and self.br in br_hh else None,
ranges_path=outputs["ranges"].path if self.save_ranges else None,
model_parameters=self.get_shown_parameters(),
campaign=self.campaign if self.campaign != law.NO_STR else None,
show_points=self.show_points,
......
......@@ -78,3 +78,4 @@
| `--show-significances` | Comma-separated significances in units of gaussian standard deviations when integer and >= 0, or confidence levels when float and < 1 to overlay with lines and labels. Defaults to `1,2,3,5`. |
| `--shift-negative-values` | When `True`, dnll2 values are vertically shifted to move the minimum back to zero in case it is negative. Defaults to `False`. |
| `--signal-from-limit` | When `True`, instead of using the best fit value, scale the signal by the upper limit as computed by `UpperLimits`. Defaults to `False`. |
| `--save-ranges` | When `True`, save allowed parameter ranges in an additional output file. Defaults to `False`. |
......@@ -2,6 +2,6 @@ The `PlotMultipleUpperLimits` task collects the fit results from multiple `Merge
<div class="dhi_parameter_table">
--8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,48,17,18,74,10,11,3,4,22,23,5,6,7,8,9"
--8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,48,17,18,74,10,11,3,4,22,23,5,6,7,8,9,81"
</div>
......@@ -3,6 +3,6 @@ It provides some handy CLI parameters to manipulate the visualisation.
<div class="dhi_parameter_table">
--8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,48,17,18,74,10,11,3,4,5,6,7,8,9"
--8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,48,17,18,74,10,11,3,4,5,6,7,8,9,81"
</div>
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment