diff --git a/dhi/plots/limits.py b/dhi/plots/limits.py index 5e0ec09a55c95399a974edddf43105bc03a3bbb1..16f4514647de8aad108a3e62171aac560bbe468e 100644 --- a/dhi/plots/limits.py +++ b/dhi/plots/limits.py @@ -4,6 +4,8 @@ Limit plots using ROOT. """ +import os +import json import math import traceback @@ -43,6 +45,7 @@ def plot_limit_scan( y_max=None, xsec_unit=None, hh_process=None, + ranges_path=None, model_parameters=None, campaign=None, show_points=False, @@ -63,11 +66,13 @@ def plot_limit_scan( when *None*, as a ratio over the theory prediction. *hh_process* can be the name of a HH subprocess configured in *dhi.config.br_hh_names* and is inserted to the process name in the title of the y-axis and indicates that the plotted cross section data was (e.g.) scaled - by a branching ratio. *model_parameters* can be a dictionary of key-value pairs of model - parameters. *campaign* should refer to the name of a campaign label defined in - *dhi.config.campaign_labels*. When *show_points* is *True*, the central scan points are drawn - on top of the interpolated curve. When *paper* is *True*, certain plot configurations are - adjusted for use in publications. + by a branching ratio. + + When *ranges_path* is set, allowed scan parameter ranges are saved to the given file. + *model_parameters* can be a dictionary of key-value pairs of model parameters. *campaign* should + refer to the name of a campaign label defined in *dhi.config.campaign_labels*. When + *show_points* is *True*, the central scan points are drawn on top of the interpolated curve. + When *paper* is *True*, certain plot configurations are adjusted for use in publications. Example: https://cms-hh.web.cern.ch/tools/inference/tasks/limits.html#limit-on-poi-vs-scan-parameter """ @@ -92,6 +97,10 @@ def plot_limit_scan( if theory_values is not None: theory_values = check_values(theory_values, ["xsec"]) has_thy_err = "xsec_p1" in theory_values and "xsec_m1" in theory_values + if ranges_path: + ranges_path = os.path.expandvars(os.path.expanduser(ranges_path)) + if not os.path.exists(os.path.dirname(ranges_path)): + os.makedirs(os.path.dirname(ranges_path)) # set default ranges if x_min is None: @@ -161,12 +170,17 @@ def plot_limit_scan( y_min_value = min(y_min_value, min(expected_values["limit"])) # print the expected excluded ranges - print_excluded_ranges(scan_parameter, poi + " expected", + excl_ranges = evaluate_excluded_ranges(scan_parameter, poi + " expected", expected_values[scan_parameter], expected_values["limit"], theory_values[scan_parameter] if has_thy else None, theory_values["xsec"] if has_thy else None, ) + allowed_ranges = {} + if ranges_path: + key = "__".join([scan_parameter, poi, "expected"]) + allowed_ranges[key] = excluded_to_allowed_ranges(excl_ranges, + expected_values[scan_parameter].min(), expected_values[scan_parameter].max()) # observed values if observed_values is not None: @@ -176,13 +190,23 @@ def plot_limit_scan( legend_entries[0] = (g_inj, "Observed", "L") y_max_value = max(y_max_value, max(observed_values["limit"])) y_min_value = min(y_min_value, min(observed_values["limit"])) + # print the observed excluded ranges - print_excluded_ranges(scan_parameter, poi + " observed", + excl_ranges = evaluate_excluded_ranges(scan_parameter, poi + " observed", observed_values[scan_parameter], observed_values["limit"], theory_values[scan_parameter] if has_thy else None, theory_values["xsec"] if has_thy else None, ) + if ranges_path: + key = "__".join([scan_parameter, poi, "observed"]) + allowed_ranges[key] = excluded_to_allowed_ranges(excl_ranges, + observed_values[scan_parameter].min(), observed_values[scan_parameter].max()) + + if ranges_path: + with open(ranges_path, "w") as f: + json.dump(allowed_ranges, f, indent=4) + print("saved allowed ranges to file") # get theory prediction limits if has_thy: @@ -282,6 +306,7 @@ def plot_limit_scans( y_max=None, xsec_unit=None, hh_process=None, + ranges_path=None, model_parameters=None, campaign=None, show_points=True, @@ -304,10 +329,13 @@ def plot_limit_scans( when *None*, as a ratio over the theory prediction. *hh_process* can be the name of a HH subprocess configured in *dhi.config.br_hh_names* and is inserted to the process name in the title of the y-axis and indicates that the plotted cross section data was (e.g.) scaled by a - branching ratio. *model_parameters* can be a dictionary of key-value pairs of model parameters. - *campaign* should refer to the name of a campaign label defined in dhi.config.campaign_labels. - When *show_points* is *True*, the central scan points are drawn on top of the interpolated - curve. When *paper* is *True*, certain plot configurations are adjusted for use in publications. + branching ratio. + + When *ranges_path* is set, all allowed scan parameter ranges are saved to the given file. + *model_parameters* can be a dictionary of key-value pairs of model parameters. *campaign* should + refer to the name of a campaign label defined in dhi.config.campaign_labels. When *show_points* + is *True*, the central scan points are drawn on top of the interpolated curve. When *paper* is + *True*, certain plot configurations are adjusted for use in publications. Example: https://cms-hh.web.cern.ch/tools/inference/tasks/limits.html#multiple-limits-on-poi-vs-scan-parameter """ @@ -345,6 +373,11 @@ def plot_limit_scans( assert scan_parameter in theory_values assert "xsec" in theory_values has_thy_err = "xsec_p1" in theory_values and "xsec_m1" in theory_values + if ranges_path: + ranges_path = os.path.expandvars(os.path.expanduser(ranges_path)) + if not os.path.exists(os.path.dirname(ranges_path)): + os.makedirs(os.path.dirname(ranges_path)) + allowed_ranges = {} # set default ranges if x_min is None: @@ -378,8 +411,6 @@ def plot_limit_scans( # central values for i, (ev, name, col, ms) in enumerate(zip(expected_values, names, _color_sequence[:n_graphs], marker_sequence[:n_graphs])): - # name = names[n_graphs - i - 1] - # expected graph mask = ~np.isnan(ev["limit"]) limit_values = ev["limit"][mask] @@ -397,12 +428,16 @@ def plot_limit_scans( y_min_value = min(y_min_value, min(limit_values)) # print expected excluded ranges - print_excluded_ranges(scan_parameter, "{}, {}, expected".format(poi, name), + excl_ranges = evaluate_excluded_ranges(scan_parameter, "{}, {}, expected".format(poi, name), scan_values, limit_values, theory_values[scan_parameter] if has_thy else None, theory_values["xsec"] if has_thy else None, ) + if ranges_path: + key = "__".join([scan_parameter, poi, "expected", name]) + allowed_ranges[key] = excluded_to_allowed_ranges(excl_ranges, + scan_values.min(), scan_values.max()) # observed graph if has_obs: @@ -424,12 +459,21 @@ def plot_limit_scans( y_min_value = min(y_min_value, min(obs_limit_values)) # print observed excluded ranges - print_excluded_ranges(scan_parameter, "{}, {}, observed".format(poi, name), + excl_ranges = evaluate_excluded_ranges(scan_parameter, "{}, {}, observed".format(poi, name), obs_scan_values, obs_limit_values, theory_values[scan_parameter] if has_thy else None, theory_values["xsec"] if has_thy else None, ) + if ranges_path: + key = "__".join([scan_parameter, poi, "observed", name]) + allowed_ranges[key] = excluded_to_allowed_ranges(excl_ranges, + obs_scan_values.min(), obs_scan_values.max()) + + if ranges_path: + with open(ranges_path, "w") as f: + json.dump(allowed_ranges, f, indent=4) + print("saved allowed ranges to file") # add additional legend entries to distinguish expected and observed lines if has_obs: @@ -1341,12 +1385,12 @@ def evaluate_limit_scan_1d(scan_values, limit_values, xsec_scan_values=None, xse ) -def print_excluded_ranges(param_name, scan_name, scan_values, limit_values, xsec_scan_values=None, - xsec_values=None, interpolation="linear"): +def evaluate_excluded_ranges(param_name, scan_name, scan_values, limit_values, + xsec_scan_values=None, xsec_values=None, interpolation="linear"): # more than 5 points are required if len(scan_values) <= 5: print("insufficient number of scan points for extracting excluded ranges") - return + return None try: ranges = evaluate_limit_scan_1d( @@ -1359,10 +1403,13 @@ def print_excluded_ranges(param_name, scan_name, scan_values, limit_values, xsec except Exception: print("1D limit scan evaluation failed") traceback.print_exc() - return + return None + # print them _print_excluded_ranges(param_name, scan_name, scan_values, ranges, interpolation) + return ranges + def _print_excluded_ranges(param_name, scan_name, scan_values, ranges, interpolation): # helper to check if the granularity of scan values is too small at a certain point @@ -1413,3 +1460,39 @@ def _print_excluded_ranges(param_name, scan_name, scan_values, ranges, interpola for start, stop in ranges: print(" - " + format_range(start, stop)) print("") + + +def excluded_to_allowed_ranges(excluded_ranges, min_value, max_value): + """ + Converts a list *ranges* of 2-tuples with edges referring to excluded ranges to allowed ones, + taking into account the endpoints given by *min_value* and *max_value*. An open range is denoted + by a *None*. This, (*None*, *None*) would mean that the entire range is allowed. + """ + # shorthands + e_ranges = excluded_ranges + a_ranges = [] + + # excluded_ranges might mark open ends with None's as well, so before linearizing values, + # replace them with the global endpoints + if e_ranges: + if e_ranges[0][0] is None: + e_ranges[0] = (min_value, e_ranges[0][0]) + if e_ranges[-1][1] is None: + e_ranges[-1] = (e_ranges[-1][0], max_value) + + # linearize excluded ranges (assuming edges are always reasonably far apart) + e_edges = sum(map(list, excluded_ranges), []) + + # loop through adjacent pairs and create allowed ranges in an alternating fashion + for i, (start, stop) in enumerate(zip(e_edges[:-1], e_edges[1:])): + # first range + if i == 0 and start > min_value: + a_ranges.append((min_value, start)) + # last range + if i == len(e_edges) - 2 and stop < max_value: + a_ranges.append((stop, max_value)) + # intermediate ranges + if i % 2 != 0: + a_ranges.append((start, stop)) + + return a_ranges diff --git a/dhi/tasks/limits.py b/dhi/tasks/limits.py index fcba284678a032f044e7b4f928746a1c3732c159..dc0f73955bc275c1a0fa605792d88a16c2b592dc 100644 --- a/dhi/tasks/limits.py +++ b/dhi/tasks/limits.py @@ -193,6 +193,10 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask): significant=False, description="show points of central limit values; default: False", ) + save_ranges = luigi.BoolParameter( + default=False, + description="save allowed parameter ranges in an additional output; default: False", + ) z_min = None z_max = None @@ -229,6 +233,8 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask): ] def output(self): + outputs = {} + # additional postfix parts = [] if self.xsec in ["pb", "fb"]: @@ -239,7 +245,13 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask): parts.append("log") names = self.create_plot_names(["limits", self.get_output_postfix(), parts]) - return [self.local_target(name) for name in names] + outputs["plots"] = [self.local_target(name) for name in names] + + if self.save_ranges: + outputs["ranges"] = self.local_target("ranges_{}.json".format( + self.get_output_postfix())) + + return outputs @law.decorator.log @law.decorator.notify @@ -250,7 +262,7 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask): # prepare the output outputs = self.output() - outputs[0].parent.touch() + outputs["plots"][0].parent.touch() # load limit values limit_values = self.load_scan_data(self.input()) @@ -315,7 +327,7 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask): # call the plot function self.call_plot_func( "dhi.plots.limits.plot_limit_scan", - paths=[outp.path for outp in outputs], + paths=[outp.path for outp in outputs["plots"]], poi=self.poi, scan_parameter=self.scan_parameter, expected_values=limit_values, @@ -328,6 +340,7 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask): y_log=self.y_log, xsec_unit=xsec_unit, hh_process=self.br if xsec_unit and self.br in br_hh else None, + ranges_path=outputs["ranges"].path if self.save_ranges else None, model_parameters=self.get_shown_parameters(), campaign=self.campaign if self.campaign != law.NO_STR else None, show_points=self.show_points, @@ -370,6 +383,8 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask): ] def output(self): + outputs = {} + # additional postfix parts = [] if self.xsec in ["pb", "fb"]: @@ -380,7 +395,13 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask): parts.append("log") names = self.create_plot_names(["multilimits", self.get_output_postfix(), parts]) - return [self.local_target(name) for name in names] + outputs["plots"] = [self.local_target(name) for name in names] + + if self.save_ranges: + outputs["ranges"] = self.local_target("ranges_{}.json".format( + self.get_output_postfix())) + + return outputs @law.decorator.log @law.decorator.notify @@ -391,7 +412,7 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask): # prepare the output outputs = self.output() - outputs[0].parent.touch() + outputs["plots"][0].parent.touch() # load limit values limit_values = [] @@ -461,7 +482,7 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask): # call the plot function self.call_plot_func( "dhi.plots.limits.plot_limit_scans", - paths=[outp.path for outp in outputs], + paths=[outp.path for outp in outputs["plots"]], poi=self.poi, scan_parameter=self.scan_parameter, names=names, @@ -475,6 +496,7 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask): y_log=self.y_log, xsec_unit=xsec_unit, hh_process=self.br if xsec_unit and self.br in br_hh else None, + ranges_path=outputs["ranges"].path if self.save_ranges else None, model_parameters=self.get_shown_parameters(), campaign=self.campaign if self.campaign != law.NO_STR else None, show_points=self.show_points, @@ -496,6 +518,8 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask): ] def output(self): + outputs = {} + # additional postfix parts = [] if self.xsec in ["pb", "fb"]: @@ -506,7 +530,13 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask): parts.append("log") names = self.create_plot_names(["multilimitsbymodel", self.get_output_postfix(), parts]) - return [self.local_target(name) for name in names] + outputs["plots"] = [self.local_target(name) for name in names] + + if self.save_ranges: + outputs["ranges"] = self.local_target("ranges_{}.json".format( + self.get_output_postfix())) + + return outputs @law.decorator.log @law.decorator.notify @@ -517,7 +547,7 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask): # prepare the output outputs = self.output() - outputs[0].parent.touch() + outputs["plots"][0].parent.touch() # load limit values limit_values = [] @@ -595,7 +625,7 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask): # call the plot function self.call_plot_func( "dhi.plots.limits.plot_limit_scans", - paths=[outp.path for outp in outputs], + paths=[outp.path for outp in outputs["plots"]], poi=self.poi, scan_parameter=self.scan_parameter, names=names, @@ -609,6 +639,7 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask): y_log=self.y_log, xsec_unit=xsec_unit, hh_process=self.br if xsec_unit and self.br in br_hh else None, + ranges_path=outputs["ranges"].path if self.save_ranges else None, model_parameters=self.get_shown_parameters(), campaign=self.campaign if self.campaign != law.NO_STR else None, show_points=self.show_points, diff --git a/docs/content/snippets/parameters.md b/docs/content/snippets/parameters.md index 3a7f8653bf61de19f9427d9e4304eb03f040797c..191925fb8d6dd3b4cadac7c6ccd80f732a8b3d4a 100644 --- a/docs/content/snippets/parameters.md +++ b/docs/content/snippets/parameters.md @@ -78,3 +78,4 @@ | `--show-significances` | Comma-separated significances in units of gaussian standard deviations when integer and >= 0, or confidence levels when float and < 1 to overlay with lines and labels. Defaults to `1,2,3,5`. | | `--shift-negative-values` | When `True`, dnll2 values are vertically shifted to move the minimum back to zero in case it is negative. Defaults to `False`. | | `--signal-from-limit` | When `True`, instead of using the best fit value, scale the signal by the upper limit as computed by `UpperLimits`. Defaults to `False`. | +| `--save-ranges` | When `True`, save allowed parameter ranges in an additional output file. Defaults to `False`. | diff --git a/docs/content/snippets/plotmultipleupperlimits_param_tab.md b/docs/content/snippets/plotmultipleupperlimits_param_tab.md index 63fe295d94676e2685296003022842b8c1cd9ace..b7e71aabbd2d4fbf4972a0f6f41d126c16523f71 100644 --- a/docs/content/snippets/plotmultipleupperlimits_param_tab.md +++ b/docs/content/snippets/plotmultipleupperlimits_param_tab.md @@ -2,6 +2,6 @@ The `PlotMultipleUpperLimits` task collects the fit results from multiple `Merge <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,48,17,18,74,10,11,3,4,22,23,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,48,17,18,74,10,11,3,4,22,23,5,6,7,8,9,81" </div> diff --git a/docs/content/snippets/plotupperlimits_param_tab.md b/docs/content/snippets/plotupperlimits_param_tab.md index 88e7fda117cda2337aea1ff6141fb11db4dc321e..453af1f36aace7398d8403d35f604db459554146 100644 --- a/docs/content/snippets/plotupperlimits_param_tab.md +++ b/docs/content/snippets/plotupperlimits_param_tab.md @@ -3,6 +3,6 @@ It provides some handy CLI parameters to manipulate the visualisation. <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,48,17,18,74,10,11,3,4,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,48,17,18,74,10,11,3,4,5,6,7,8,9,81" </div>