diff --git a/dhi/plots/limits.py b/dhi/plots/limits.py index 7b31279758c38891fd81c62b837e79fcfca57a4c..7ffa65eb9a59a53c2ef40d8591d01666983f0aca 100644 --- a/dhi/plots/limits.py +++ b/dhi/plots/limits.py @@ -4,6 +4,7 @@ Limit plots using ROOT. """ +import os import json import math import traceback @@ -44,11 +45,11 @@ def plot_limit_scan( y_max=None, xsec_unit=None, hh_process=None, + ranges_path=None, model_parameters=None, campaign=None, show_points=False, paper=False, - save_exclusion_ranges=None, ): """ Creates a plot for the upper limit scan of a *poi* over a *scan_parameter* and saves it at @@ -65,12 +66,13 @@ def plot_limit_scan( when *None*, as a ratio over the theory prediction. *hh_process* can be the name of a HH subprocess configured in *dhi.config.br_hh_names* and is inserted to the process name in the title of the y-axis and indicates that the plotted cross section data was (e.g.) scaled - by a branching ratio. *model_parameters* can be a dictionary of key-value pairs of model - parameters. *campaign* should refer to the name of a campaign label defined in - *dhi.config.campaign_labels*. When *show_points* is *True*, the central scan points are drawn - on top of the interpolated curve. When *paper* is *True*, certain plot configurations are - adjusted for use in publications. When *save_exclusion_ranges* is set, the exclusion range is - saved to the given file. + by a branching ratio. + + When *ranges_path* is set, allowed scan parameter ranges are saved to the given file. + *model_parameters* can be a dictionary of key-value pairs of model parameters. *campaign* should + refer to the name of a campaign label defined in *dhi.config.campaign_labels*. When + *show_points* is *True*, the central scan points are drawn on top of the interpolated curve. + When *paper* is *True*, certain plot configurations are adjusted for use in publications. Example: https://cms-hh.web.cern.ch/tools/inference/tasks/limits.html#limit-on-poi-vs-scan-parameter """ @@ -95,6 +97,10 @@ def plot_limit_scan( if theory_values is not None: theory_values = check_values(theory_values, ["xsec"]) has_thy_err = "xsec_p1" in theory_values and "xsec_m1" in theory_values + if ranges_path: + ranges_path = os.path.expandvars(os.path.expanduser(ranges_path)) + if not os.path.exists(os.path.dirname(ranges_path)): + os.makedirs(os.path.dirname(ranges_path)) # set default ranges if x_min is None: @@ -164,22 +170,17 @@ def plot_limit_scan( y_min_value = min(y_min_value, min(expected_values["limit"])) # print the expected excluded ranges - print_excluded_ranges(scan_parameter, poi + " expected", + excl_ranges = evaluate_excluded_ranges(scan_parameter, poi + " expected", expected_values[scan_parameter], expected_values["limit"], theory_values[scan_parameter] if has_thy else None, theory_values["xsec"] if has_thy else None, ) - if save_exclusion_ranges is not None: - assert isinstance(save_exclusion_ranges, str) - excluded_ranges = excluded_ranges_to_json(scan_parameter, poi + " expected", - expected_values[scan_parameter], - expected_values["limit"], - theory_values[scan_parameter] if has_thy else None, - theory_values["xsec"] if has_thy else None, - ) - with open(save_exclusion_ranges, "a") as f: - f.write(json.dumps(excluded_ranges, indent=4)) + allowed_ranges = {} + if ranges_path: + key = "__".join([scan_parameter, poi, "expected"]) + allowed_ranges[key] = excluded_to_allowed_ranges(excl_ranges, + expected_values[scan_parameter].min(), expected_values[scan_parameter].max()) # observed values if observed_values is not None: @@ -189,23 +190,23 @@ def plot_limit_scan( legend_entries[0] = (g_inj, "Observed", "L") y_max_value = max(y_max_value, max(observed_values["limit"])) y_min_value = min(y_min_value, min(observed_values["limit"])) + # print the observed excluded ranges - print_excluded_ranges(scan_parameter, poi + " observed", + excl_ranges = evaluate_excluded_ranges(scan_parameter, poi + " observed", observed_values[scan_parameter], observed_values["limit"], theory_values[scan_parameter] if has_thy else None, theory_values["xsec"] if has_thy else None, ) - if save_exclusion_ranges is not None: - assert isinstance(save_exclusion_ranges, str) - excluded_ranges = excluded_ranges_to_json(scan_parameter, poi + " expected", - expected_values[scan_parameter], - expected_values["limit"], - theory_values[scan_parameter] if has_thy else None, - theory_values["xsec"] if has_thy else None, - ) - with open(save_exclusion_ranges, "a") as f: - f.write(json.dumps(excluded_ranges, indent=4)) + if ranges_path: + key = "__".join([scan_parameter, poi, "observed"]) + allowed_ranges[key] = excluded_to_allowed_ranges(excl_ranges, + observed_values[scan_parameter].min(), observed_values[scan_parameter].max()) + + if ranges_path: + with open(ranges_path, "w") as f: + json.dump(allowed_ranges, f, indent=4) + print("saved allowed ranges to file") # get theory prediction limits if has_thy: @@ -299,11 +300,11 @@ def plot_limit_scans( y_max=None, xsec_unit=None, hh_process=None, + ranges_path=None, model_parameters=None, campaign=None, show_points=True, paper=False, - save_exclusion_ranges=None, ): """ Creates a plot showing multiple upper limit scans of a *poi* over a *scan_parameter* and saves @@ -322,11 +323,13 @@ def plot_limit_scans( when *None*, as a ratio over the theory prediction. *hh_process* can be the name of a HH subprocess configured in *dhi.config.br_hh_names* and is inserted to the process name in the title of the y-axis and indicates that the plotted cross section data was (e.g.) scaled by a - branching ratio. *model_parameters* can be a dictionary of key-value pairs of model parameters. - *campaign* should refer to the name of a campaign label defined in dhi.config.campaign_labels. - When *show_points* is *True*, the central scan points are drawn on top of the interpolated - curve. When *paper* is *True*, certain plot configurations are adjusted for use in publications. - When *save_exclusion_ranges* is set, the exclusion ranges are saved to the given file. + branching ratio. + + When *ranges_path* is set, all allowed scan parameter ranges are saved to the given file. + *model_parameters* can be a dictionary of key-value pairs of model parameters. *campaign* should + refer to the name of a campaign label defined in dhi.config.campaign_labels. When *show_points* + is *True*, the central scan points are drawn on top of the interpolated curve. When *paper* is + *True*, certain plot configurations are adjusted for use in publications. Example: https://cms-hh.web.cern.ch/tools/inference/tasks/limits.html#multiple-limits-on-poi-vs-scan-parameter """ @@ -364,6 +367,11 @@ def plot_limit_scans( assert scan_parameter in theory_values assert "xsec" in theory_values has_thy_err = "xsec_p1" in theory_values and "xsec_m1" in theory_values + if ranges_path: + ranges_path = os.path.expandvars(os.path.expanduser(ranges_path)) + if not os.path.exists(os.path.dirname(ranges_path)): + os.makedirs(os.path.dirname(ranges_path)) + allowed_ranges = {} # set default ranges if x_min is None: @@ -394,13 +402,9 @@ def plot_limit_scans( if all(name in br_hh_colors.root for name in names): _color_sequence = [br_hh_colors.root[name] for name in names] - - multi_exclusion_ranges = {} # central values for i, (ev, name, col, ms) in enumerate(zip(expected_values, names, _color_sequence[:n_graphs], marker_sequence[:n_graphs])): - # name = names[n_graphs - i - 1] - # expected graph mask = ~np.isnan(ev["limit"]) limit_values = ev["limit"][mask] @@ -418,23 +422,16 @@ def plot_limit_scans( y_min_value = min(y_min_value, min(limit_values)) # print expected excluded ranges - print_excluded_ranges(scan_parameter, "{}, {}, expected".format(poi, name), + excl_ranges = evaluate_excluded_ranges(scan_parameter, "{}, {}, expected".format(poi, name), scan_values, limit_values, theory_values[scan_parameter] if has_thy else None, theory_values["xsec"] if has_thy else None, ) - if save_exclusion_ranges is not None: - assert isinstance(save_exclusion_ranges, str) - multi_exclusion_ranges.update( - excluded_ranges_to_json(scan_parameter, "{}, {}, expected".format(poi, name), - scan_values, - limit_values, - theory_values[scan_parameter] if has_thy else None, - theory_values["xsec"] if has_thy else None, - custom_name=name, - ) - ) + if ranges_path: + key = "__".join([scan_parameter, poi, "expected", name]) + allowed_ranges[key] = excluded_to_allowed_ranges(excl_ranges, + scan_values.min(), scan_values.max()) # observed graph if has_obs: @@ -456,27 +453,21 @@ def plot_limit_scans( y_min_value = min(y_min_value, min(obs_limit_values)) # print observed excluded ranges - print_excluded_ranges(scan_parameter, "{}, {}, observed".format(poi, name), + excl_ranges = evaluate_excluded_ranges(scan_parameter, "{}, {}, observed".format(poi, name), obs_scan_values, obs_limit_values, theory_values[scan_parameter] if has_thy else None, theory_values["xsec"] if has_thy else None, ) - if save_exclusion_ranges is not None: - assert isinstance(save_exclusion_ranges, str) - multi_exclusion_ranges.update( - excluded_ranges_to_json(scan_parameter, "{}, {}, observed".format(poi, name), - obs_scan_values, - obs_limit_values, - theory_values[scan_parameter] if has_thy else None, - theory_values["xsec"] if has_thy else None, - custom_name=name, - ) - ) - - if save_exclusion_ranges is not None: - with open(save_exclusion_ranges, "a") as f: - f.write(json.dumps(multi_exclusion_ranges, indent=4)) + if ranges_path: + key = "__".join([scan_parameter, poi, "observed", name]) + allowed_ranges[key] = excluded_to_allowed_ranges(excl_ranges, + obs_scan_values.min(), obs_scan_values.max()) + + if ranges_path: + with open(ranges_path, "w") as f: + json.dump(allowed_ranges, f, indent=4) + print("saved allowed ranges to file") # add additional legend entries to distinguish expected and observed lines if has_obs: @@ -1379,11 +1370,12 @@ def evaluate_limit_scan_1d(scan_values, limit_values, xsec_scan_values=None, xse ) -def get_excluded_ranges(scan_values, limit_values, xsec_scan_values=None, xsec_values=None, interpolation="linear"): +def evaluate_excluded_ranges(param_name, scan_name, scan_values, limit_values, + xsec_scan_values=None, xsec_values=None, interpolation="linear"): # more than 5 points are required if len(scan_values) <= 5: print("insufficient number of scan points for extracting excluded ranges") - return + return None try: ranges = evaluate_limit_scan_1d( @@ -1396,15 +1388,12 @@ def get_excluded_ranges(scan_values, limit_values, xsec_scan_values=None, xsec_v except Exception: print("1D limit scan evaluation failed") traceback.print_exc() - return - return ranges + return None + # print them + _print_excluded_ranges(param_name, scan_name, scan_values, ranges, interpolation) -def print_excluded_ranges(param_name, scan_name, scan_values, limit_values, xsec_scan_values=None, - xsec_values=None, interpolation="linear"): - ranges = get_excluded_ranges(scan_values, limit_values, xsec_scan_values=xsec_scan_values, xsec_values=xsec_values, interpolation=interpolation) - if ranges is not None: - _print_excluded_ranges(param_name, scan_name, scan_values, ranges, interpolation) + return ranges def _print_excluded_ranges(param_name, scan_name, scan_values, ranges, interpolation): @@ -1458,20 +1447,37 @@ def _print_excluded_ranges(param_name, scan_name, scan_values, ranges, interpola print("") -def excluded_ranges_to_json(param_name, scan_name, scan_values, limit_values, xsec_scan_values=None, - xsec_values=None, interpolation="linear", custom_name=""): - ranges = get_excluded_ranges(scan_values, limit_values, xsec_scan_values=xsec_scan_values, xsec_values=xsec_values, interpolation=interpolation) - key = custom_name or param_name - if ranges is not None: - for start, stop in ranges: - if len(ranges) == 2: - ret = (ranges[0][1], ranges[1][0]) - else: - assert len(ranges) == 1 - if start == scan_values.min() or start == scan_values.max(): - ret = (stop,) - else: - ret = (start,) - else: - ret = (np.nan, np.nan) - return {key: ret} +def excluded_to_allowed_ranges(excluded_ranges, min_value, max_value): + """ + Converts a list *ranges* of 2-tuples with edges referring to excluded ranges to allowed ones, + taking into account the endpoints given by *min_value* and *max_value*. An open range is denoted + by a *None*. This, (*None*, *None*) would mean that the entire range is allowed. + """ + # shorthands + e_ranges = excluded_ranges + a_ranges = [] + + # excluded_ranges might mark open ends with None's as well, so before linearizing values, + # replace them with the global endpoints + if e_ranges: + if e_ranges[0][0] is None: + e_ranges[0] = (min_value, e_ranges[0][0]) + if e_ranges[-1][1] is None: + e_ranges[-1] = (e_ranges[-1][0], max_value) + + # linearize excluded ranges (assuming edges are always reasonably far apart) + e_edges = sum(map(list, excluded_ranges), []) + + # loop through adjacent pairs and create allowed ranges in an alternating fashion + for i, (start, stop) in enumerate(zip(e_edges[:-1], e_edges[1:])): + # first range + if i == 0 and start > min_value: + a_ranges.append((min_value, start)) + # last range + if i == len(e_edges) - 2 and stop < max_value: + a_ranges.append((stop, max_value)) + # intermediate ranges + if i % 2 != 0: + a_ranges.append((start, stop)) + + return a_ranges diff --git a/dhi/tasks/limits.py b/dhi/tasks/limits.py index c78ffae3736519113232307e85c108ece1b59fa9..dc0f73955bc275c1a0fa605792d88a16c2b592dc 100644 --- a/dhi/tasks/limits.py +++ b/dhi/tasks/limits.py @@ -193,7 +193,10 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask): significant=False, description="show points of central limit values; default: False", ) - save_exclusion_ranges = luigi.BoolParameter(default=False, description="save exclusion ranges in an additional output; default: False") + save_ranges = luigi.BoolParameter( + default=False, + description="save allowed parameter ranges in an additional output; default: False", + ) z_min = None z_max = None @@ -231,6 +234,7 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask): def output(self): outputs = {} + # additional postfix parts = [] if self.xsec in ["pb", "fb"]: @@ -242,8 +246,11 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask): names = self.create_plot_names(["limits", self.get_output_postfix(), parts]) outputs["plots"] = [self.local_target(name) for name in names] - if self.save_exclusion_ranges: - outputs["exclusion_ranges"] = self.local_target("exclusion_ranges_{}.json".format(self.get_output_postfix())) + + if self.save_ranges: + outputs["ranges"] = self.local_target("ranges_{}.json".format( + self.get_output_postfix())) + return outputs @law.decorator.log @@ -254,8 +261,8 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask): import numpy as np # prepare the output - outputs = self.output()["plots"] - outputs[0].parent.touch() + outputs = self.output() + outputs["plots"][0].parent.touch() # load limit values limit_values = self.load_scan_data(self.input()) @@ -320,7 +327,7 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask): # call the plot function self.call_plot_func( "dhi.plots.limits.plot_limit_scan", - paths=[outp.path for outp in outputs], + paths=[outp.path for outp in outputs["plots"]], poi=self.poi, scan_parameter=self.scan_parameter, expected_values=limit_values, @@ -333,11 +340,11 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask): y_log=self.y_log, xsec_unit=xsec_unit, hh_process=self.br if xsec_unit and self.br in br_hh else None, + ranges_path=outputs["ranges"].path if self.save_ranges else None, model_parameters=self.get_shown_parameters(), campaign=self.campaign if self.campaign != law.NO_STR else None, show_points=self.show_points, paper=self.paper, - save_exclusion_ranges=self.output()["exclusion_ranges"].path if self.save_exclusion_ranges else None, ) def load_scan_data(self, inputs): @@ -377,6 +384,7 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask): def output(self): outputs = {} + # additional postfix parts = [] if self.xsec in ["pb", "fb"]: @@ -388,8 +396,11 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask): names = self.create_plot_names(["multilimits", self.get_output_postfix(), parts]) outputs["plots"] = [self.local_target(name) for name in names] - if self.save_exclusion_ranges: - outputs["exclusion_ranges"] = self.local_target("exclusion_ranges_{}.json".format(self.get_output_postfix())) + + if self.save_ranges: + outputs["ranges"] = self.local_target("ranges_{}.json".format( + self.get_output_postfix())) + return outputs @law.decorator.log @@ -400,8 +411,8 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask): import numpy as np # prepare the output - outputs = self.output()["plots"] - outputs[0].parent.touch() + outputs = self.output() + outputs["plots"][0].parent.touch() # load limit values limit_values = [] @@ -471,7 +482,7 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask): # call the plot function self.call_plot_func( "dhi.plots.limits.plot_limit_scans", - paths=[outp.path for outp in outputs], + paths=[outp.path for outp in outputs["plots"]], poi=self.poi, scan_parameter=self.scan_parameter, names=names, @@ -485,11 +496,11 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask): y_log=self.y_log, xsec_unit=xsec_unit, hh_process=self.br if xsec_unit and self.br in br_hh else None, + ranges_path=outputs["ranges"].path if self.save_ranges else None, model_parameters=self.get_shown_parameters(), campaign=self.campaign if self.campaign != law.NO_STR else None, show_points=self.show_points, paper=self.paper, - save_exclusion_ranges=self.output()["exclusion_ranges"].path if self.save_exclusion_ranges else None, ) @@ -507,6 +518,8 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask): ] def output(self): + outputs = {} + # additional postfix parts = [] if self.xsec in ["pb", "fb"]: @@ -517,7 +530,13 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask): parts.append("log") names = self.create_plot_names(["multilimitsbymodel", self.get_output_postfix(), parts]) - return [self.local_target(name) for name in names] + outputs["plots"] = [self.local_target(name) for name in names] + + if self.save_ranges: + outputs["ranges"] = self.local_target("ranges_{}.json".format( + self.get_output_postfix())) + + return outputs @law.decorator.log @law.decorator.notify @@ -528,7 +547,7 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask): # prepare the output outputs = self.output() - outputs[0].parent.touch() + outputs["plots"][0].parent.touch() # load limit values limit_values = [] @@ -606,7 +625,7 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask): # call the plot function self.call_plot_func( "dhi.plots.limits.plot_limit_scans", - paths=[outp.path for outp in outputs], + paths=[outp.path for outp in outputs["plots"]], poi=self.poi, scan_parameter=self.scan_parameter, names=names, @@ -620,6 +639,7 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask): y_log=self.y_log, xsec_unit=xsec_unit, hh_process=self.br if xsec_unit and self.br in br_hh else None, + ranges_path=outputs["ranges"].path if self.save_ranges else None, model_parameters=self.get_shown_parameters(), campaign=self.campaign if self.campaign != law.NO_STR else None, show_points=self.show_points, diff --git a/docs/content/snippets/parameters.md b/docs/content/snippets/parameters.md index ea6cec6bb646a00124bca2b84da55f5370332a8b..191925fb8d6dd3b4cadac7c6ccd80f732a8b3d4a 100644 --- a/docs/content/snippets/parameters.md +++ b/docs/content/snippets/parameters.md @@ -78,4 +78,4 @@ | `--show-significances` | Comma-separated significances in units of gaussian standard deviations when integer and >= 0, or confidence levels when float and < 1 to overlay with lines and labels. Defaults to `1,2,3,5`. | | `--shift-negative-values` | When `True`, dnll2 values are vertically shifted to move the minimum back to zero in case it is negative. Defaults to `False`. | | `--signal-from-limit` | When `True`, instead of using the best fit value, scale the signal by the upper limit as computed by `UpperLimits`. Defaults to `False`. | -| `--save-exclusion-ranges` | When `True`, save exclusion ranges in an additional output file. Defaults to `False`. | +| `--save-ranges` | When `True`, save allowed parameter ranges in an additional output file. Defaults to `False`. |