diff --git a/README.md b/README.md index 1976798c00aa5541a48a14b0f98eaf0f0d8cfb55..eb877bcea9c8333f2f40eea8b3d4dd2fd79795d4 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,9 @@ module 'dhi.tasks.base', 3 task(s): - BundleRepo - BundleSoftware +module 'dhi.tasks.snapshot', 1 task(s): + - Snapshot + module 'dhi.tasks.limits', 7 task(s): - UpperLimits - PlotUpperLimitsAtPoint @@ -154,7 +157,7 @@ module 'dhi.tasks.exclusion', 2 task(s): - PlotExclusionAndBestFit - PlotExclusionAndBestFit2D -written 44 task(s) to index file '/your/path/inference/.law/index' +written 45 task(s) to index file '/your/path/inference/.law/index' ``` You can type diff --git a/dhi/config.py b/dhi/config.py index 33562128e678f2d04df3badd8d04ada14a34dba1..698ac305232ec844a9979210ae700b83c667fd1d 100644 --- a/dhi/config.py +++ b/dhi/config.py @@ -159,7 +159,7 @@ colors = DotDict( # color sequence for plots with multiple elements color_sequence = ["blue", "red", "green", "grey", "pink", "cyan", "orange", "light_green", "yellow"] -color_sequence += 10 * ["black"] +color_sequence += 10 * ["grey"] # marker sequence for plots with multiple elements marker_sequence = [20, 21, 22, 23, 24, 25, 26, 32, 27, 33, 28, 34, 29, 30] diff --git a/dhi/plots/likelihoods.py b/dhi/plots/likelihoods.py index 0952705eafc4afae5740751d05e440aefba5df9c..c525dad42dae766ad1bbee9b2dd2b4180f6dfeab 100644 --- a/dhi/plots/likelihoods.py +++ b/dhi/plots/likelihoods.py @@ -21,7 +21,7 @@ from dhi.util import ( ) from dhi.plots.util import ( use_style, create_model_parameters, fill_hist_from_points, get_contours, get_y_range, - infer_binning_from_grid, get_contour_box, + infer_binning_from_grid, get_contour_box, make_parameter_label_map, ) @@ -49,10 +49,10 @@ def plot_likelihood_scan_1d( paper=False, ): """ - Creates a likelihood plot of the 1D scan of a *poi* and saves it at *paths*. *values* should be a - mapping to lists of values or a record array with keys "" and "dnll2". *theory_value* - can be a 3-tuple denoting the nominal theory prediction of the POI and its up and down - uncertainties which is drawn as a vertical bar. + Creates a likelihood plot of the 1D scan of a *poi* and saves it at *paths*. *values* should be + a mapping to lists of values or a record array with keys "" and "dnll2". + *theory_value* can be a 3-tuple denoting the nominal theory prediction of the POI and its up and + down uncertainties which is drawn as a vertical bar. When *poi_min* is set, it should be the value of the poi that leads to the best likelihood. Otherwise, it is estimated from the interpolated curve. When *show_best_fit* @@ -119,9 +119,9 @@ def plot_likelihood_scan_1d( r.setup_line(line, props={"LineColor": colors.black, "LineStyle": 2, "NDC": False}) draw_objs.append(line) - # lines at chi2_1 intervals + # horizontal lines at chi2_1 intervals for n in [chi2_levels[1][1], chi2_levels[1][2]]: - if n < y_max_line: + if y_min <= n <= y_max_line: line = ROOT.TLine(x_min, n, x_max, n) r.setup_line(line, props={"LineColor": colors.black, "LineStyle": 2, "NDC": False}) draw_objs.append(line) @@ -144,8 +144,8 @@ def plot_likelihood_scan_1d( if not has_thy_err: legend_entries.append((line_thy, "Theory prediction", "L")) - # line for best fit value - if show_best_fit and scan: + # vertical line for best fit value + if show_best_fit and scan and (x_min <= scan.poi_min <= x_max): line_fit = ROOT.TLine(scan.poi_min, y_min, scan.poi_min, y_max_line) r.setup_line(line_fit, props={"LineWidth": 2, "NDC": False}, color=colors.black) draw_objs.append(line_fit) @@ -251,8 +251,12 @@ def plot_likelihood_scans_1d( d.setdefault("poi_min", None) # default name d.setdefault("name", str(i + 1)) - # keep only valid points - values = {k: np.array(v, dtype=np.float32) for k, v in values.items()} + # drop all fields except for required ones + values = { + k: np.array(v, dtype=np.float32) + for k, v in values.items() + if k in [poi, "dnll2"] + } # preprocess values (nan detection, negative shift) values["dnll2"], values[poi] = _preprocess_values(values["dnll2"], (poi, values[poi]), shift_negative_values=shift_negative_values, origin="entry '{}'".format(d["name"]), @@ -290,9 +294,9 @@ def plot_likelihood_scans_1d( r.setup_hist(h_dummy, pad=pad, props={"LineWidth": 0, "Minimum": y_min, "Maximum": y_max}) draw_objs.append((h_dummy, "HIST")) - # lines at chi2_1 intervals + # horizontal lines at chi2_1 intervals for n in [chi2_levels[1][1], chi2_levels[1][2]]: - if n < y_max: + if y_min <= n <= y_max_line: line = ROOT.TLine(x_min, n, x_max, n) r.setup_line(line, props={"LineColor": colors.black, "LineStyle": 2, "NDC": False}) draw_objs.append(line) @@ -315,8 +319,8 @@ def plot_likelihood_scans_1d( legend_entries.insert(0, (g_nll, to_root_latex(br_hh_names.get(d["name"], d["name"])), "LP" if show_points else "L")) - # line for best fit value - if show_best_fit and scan: + # vertical line for best fit value + if show_best_fit and scan and (x_min <= scan.poi_min <= x_max): line_fit = ROOT.TLine(scan.poi_min, y_min, scan.poi_min, y_max_line) r.setup_line(line_fit, props={"LineWidth": 2, "NDC": False}, color=colors[col]) draw_objs.append(line_fit) @@ -437,6 +441,8 @@ def plot_likelihood_scan_2d( assert poi1 in _values assert poi2 in _values assert "dnll2" in _values + # drop all fields except for required ones + _values = {k: v for k, v in _values.items() if k in [poi1, poi2, "dnll2"]} # preprocess values (nan detection, negative shift) _values["dnll2"], _values[poi1], _values[poi2] = _preprocess_values(_values["dnll2"], (poi1, _values[poi1]), (poi2, _values[poi2]), remove_nans=interpolate_nans, @@ -671,7 +677,12 @@ def plot_likelihood_scans_2d( assert len(d["poi_mins"]) == 2 # default name d.setdefault("name", str(i + 1)) - values = {k: np.array(v, dtype=np.float32) for k, v in values.items()} + # drop all fields except for required ones and convert to arrays + values = { + k: np.array(v, dtype=np.float32) + for k, v in values.items() + if k in [poi1, poi2, "dnll2"] + } # preprocess values (nan detection, negative shift) values["dnll2"], values[poi1], values[poi2] = _preprocess_values(values["dnll2"], (poi1, values[poi1]), (poi2, values[poi2]), shift_negative_values=shift_negative_values, @@ -806,7 +817,9 @@ def plot_nuisance_likelihood_scans( only_parameters=None, parameters_per_page=1, sort_max=False, - scan_points=101, + show_diff=False, + labels=None, + scan_points=401, x_min=-2., x_max=2, y_min=None, @@ -816,21 +829,32 @@ def plot_nuisance_likelihood_scans( campaign=None, paper=False, ): - """ - Creates a plot showing the change of the negative log-likelihood, obtained *poi*, when varying - values of nuisance paramaters and saves it at *paths*. The calculation of the likelihood change - requires the RooFit *workspace* to read the model config, a RooDataSet *dataset* to construct - the functional likelihood, and the output file *fit_diagnostics_path* of the combine fit - diagnostics for reading pre- and post-fit parameters for the fit named *fit_name*, defaulting - to ``"fit_s"``. + r""" + Creates a plot showing the change of the negative log-likelihood, previously obtained for a + *poi*, when varying values of nuisance paramaters and saves it at *paths*. The calculation of + the likelihood change requires the RooFit *workspace* to read the model config, a RooDataSet + *dataset* to construct the functional likelihood, and the output file *fit_diagnostics_path* of + the combine fit diagnostics for reading pre- and post-fit parameters for the fit named + *fit_name*, defaulting to ``"fit_s"``. Nuisances to skip, or to show exclusively can be configured via *skip_parameters* and *only_parameters*, respectively, which can be lists of patterns. *parameters_per_page* defines the number of parameter curves that are drawn in the same canvas page. When *sort_max* is - *True*, the parameter are sorted by their highest likelihood change in the full scan range. The - scan range and granularity is set via *scan_points*, *x_min* and *x_max*. *y_min* and *y_max* - define the range of the y-axis, which is plotted with a logarithmic scale when *y_log* is - *True*. *model_parameters* can be a dictionary of key-value pairs of model parameters. + *True*, the parameter are sorted by their highest likelihood change in the full scan range. + By default, the x-axis shows absolute variations of the nuisance parameters (in terms of the + prefit range). When *show_diff* is *True*, it shows differences with respect to the best fit + value instead. + + *labels* should be a dictionary or a json file containing a dictionary that maps nuisances names + to labels shown in the plot, a python file containing a function named "rename_nuisance", or a + function itself. When it is a dictionary and a key starts with "^" and ends with "$" it is + interpreted as a regular expression. Matched groups can be reused in the substituted name via + '\n' to reference the n-th group (following the common re.sub format). When it is a function, it + should accept the current nuisance label as a single argument and return the new value. + + The scan range and granularity is set via *scan_points*, *x_min* and *x_max*. *y_min* and + *y_max* define the range of the y-axis, which is plotted with a logarithmic scale when *y_log* + is *True*. *model_parameters* can be a dictionary of key-value pairs of model parameters. *campaign* should refer to the name of a campaign label defined in *dhi.config.campaign_labels*. When *paper* is *True*, certain plot configurations are adjusted for use in publications. @@ -868,8 +892,10 @@ def plot_nuisance_likelihood_scans( param_names.append(param_name) print("preparing scans of {} parameter(s)".format(len(param_names))) - # prepare the scan values, ensure that 0 is contained - scan_values = np.linspace(x_min, x_max, scan_points).tolist() + # prepare the scan values, extend the range by 10 points in each directon, ensure 0 is contained + assert scan_points > 1 + width = float(x_max - x_min) / (scan_points - 1) + scan_values = np.linspace(x_min - 10 * width, x_max + 10 * width, scan_points + 20).tolist() if 0 not in scan_values: scan_values = sorted(scan_values + [0.]) @@ -886,8 +912,9 @@ def plot_nuisance_likelihood_scans( x_values, y_values = [], [] print("scanning parameter {}".format(name)) for x in scan_values: - param.setVal(param_bf + (pre_u if x >= 0 else -pre_d) * x) - x_values.append(param.getVal()) + x_diff = x * (pre_u if x >= 0 else -pre_d) + param.setVal(param_bf + x_diff) + x_values.append(x_diff if show_diff else (param_bf + x_diff)) y_values.append(2 * (nll.getVal() - nll_base)) curve_data[name] = (x_values, y_values) @@ -907,6 +934,9 @@ def plot_nuisance_likelihood_scans( else: param_groups.append([name]) + # prepare labels + labels = make_parameter_label_map(param_names, labels) + # go through nuisances canvas = None for names in param_groups: @@ -930,8 +960,11 @@ def plot_nuisance_likelihood_scans( y_max, log=y_log) # dummy histogram to control axes - x_title = "(#theta - #theta_{best}) / #Delta#theta_{pre}" - y_title = "Change in -2 log(L)" + if show_diff: + x_title = "(#theta - #theta_{best}) / #Delta#theta_{pre}" + else: + x_title = "#theta / #Delta#theta_{pre}" + y_title = "-2 #Delta log(L)" h_dummy = ROOT.TH1F("dummy", ";{};{}".format(x_title, y_title), 1, x_min, x_max) r.setup_hist(h_dummy, pad=pad, props={"LineWidth": 0, "Minimum": _y_min, "Maximum": _y_max}) draw_objs = [(h_dummy, "HIST")] @@ -956,7 +989,8 @@ def plot_nuisance_likelihood_scans( g_nll = create_tgraph(len(x), x, y) r.setup_graph(g_nll, props={"LineWidth": 2, "LineStyle": 1}, color=colors[col]) draw_objs.append((g_nll, "SAME,C")) - legend_entries.append((g_nll, to_root_latex(name), "L")) + label = to_root_latex(labels.get(name, name)) + legend_entries.append((g_nll, label, "L")) # legend legend_cols = min(int(math.ceil(len(legend_entries) / 4.)), 3) @@ -1121,7 +1155,8 @@ def evaluate_likelihood_scan_1d(poi_values, dnll2_values, poi_min=None): # first, obtain an interpolation function # interp = scipy.interpolate.interp1d(poi_values, dnll2_values, kind="linear") try: - interp = scipy.interpolate.interp1d(poi_values, dnll2_values, kind="cubic") + interp = scipy.interpolate.interp1d(poi_values, dnll2_values, kind="cubic", + fill_value="extrapolate") except: return None @@ -1141,8 +1176,8 @@ def evaluate_likelihood_scan_1d(poi_values, dnll2_values, poi_min=None): # compare and optionally issue a warning (threshold to be optimized) if abs(poi_min - poi_min_new) >= 0.03: warn( - "WARNING: external POI minimum (from combine) {:.4f} differs from the " - "recomputed value (from scipy.interpolate and scipy.minimize) {:.4f}".format( + "WARNING: external POI minimum {:.4f} (from combine) differs from the " + "recomputed value {:.4f} (from scipy.interpolate and scipy.minimize)".format( poi_min, poi_min_new) ) else: @@ -1275,7 +1310,7 @@ def evaluate_likelihood_scan_2d( raise Exception("could not find minimum of nll2 interpolation: {}".format(res.message)) else: poi1_min_new = res.x[0] - poi2_min_new = res.x[0] + poi2_min_new = res.x[1] print("done, found {:.4f}, {:.4f}".format(poi1_min_new, poi2_min_new)) if xcheck: # compare and optionally issue a warning (threshold to be optimized) diff --git a/dhi/plots/pulls_impacts.py b/dhi/plots/pulls_impacts.py index 32312d11c5ebe0252ee35b16f4e1cb2c1600af61..9afaaf9c784703c9d256d5a9dbd6f2ee47413b93 100644 --- a/dhi/plots/pulls_impacts.py +++ b/dhi/plots/pulls_impacts.py @@ -6,7 +6,6 @@ Pull and impact plots using ROOT. import os import json -import re import math import array @@ -15,9 +14,9 @@ import numpy as np from dhi.config import poi_data, campaign_labels, colors, cms_postfix from dhi.util import ( - import_ROOT, import_file, multi_match, to_root_latex, linspace, colored, make_list, + import_ROOT, multi_match, to_root_latex, linspace, colored, make_list, ) -from dhi.plots.util import use_style +from dhi.plots.util import use_style, make_parameter_label_map colors = colors.root @@ -143,42 +142,7 @@ def plot_pulls_impacts( params = [params[i] for i in indices] # prepare labels - if isinstance(labels, six.string_types): - labels = os.path.expandvars(os.path.expanduser(labels)) - # try to load a renaming function called "rename_nuisance" - if labels.endswith(".py"): - labels = import_file(labels, attr="rename_nuisance") - if not callable(labels): - raise Exception("rename_nuisance loaded from {} is not callable".format(labels)) - else: - with open(labels, "r") as f: - labels = json.load(f) - elif not labels: - labels = {} - - if not isinstance(labels, dict): - # labels is a renaming function, call it for all parameters and store the result when - # names changed - _labels = {} - for param in params: - new_name = labels(param.name) - if new_name and new_name != param.name: - _labels[param.name] = new_name - labels = _labels - else: - # expand regular expressions through eager interpolation using parameter names - for k, v in labels.items(): - if not k.startswith("^") or not k.endswith("$"): - continue - for param in params: - # skip explicit translations, effectively giving them priority - if param.name in labels: - continue - # apply the pattern - new_name = re.sub(k, v, param.name) - # store a translation label when the name has changed - if new_name != param.name: - labels[param.name] = new_name + labels = make_parameter_label_map([param.name for param in params], labels) # determine the number of pages if parameters_per_page < 1: diff --git a/dhi/plots/util.py b/dhi/plots/util.py index 9f26d3e086529b7779d588a487e45f1e2a4df327..133c20811436953f28a476b16b16cdc7c0ae6f12 100644 --- a/dhi/plots/util.py +++ b/dhi/plots/util.py @@ -4,9 +4,12 @@ Different helpers and ROOT style configurations to be used with plotlib. """ +import os +import re import math import array import uuid +import json import functools import itertools import contextlib @@ -17,7 +20,7 @@ import numpy as np import scipy.interpolate from dhi.config import poi_data, br_hh_names -from dhi.util import import_ROOT, try_int, to_root_latex, make_list +from dhi.util import import_ROOT, import_file, try_int, to_root_latex, make_list _styles = {} @@ -131,6 +134,48 @@ def determine_limit_digits(limit, is_xsec=False): return 0 +def make_parameter_label_map(parameter_names, labels=None): + # prepare labels + if isinstance(labels, six.string_types): + labels = os.path.expandvars(os.path.expanduser(labels)) + # try to load a renaming function called "rename_nuisance" + if labels.endswith(".py"): + labels = import_file(labels, attr="rename_nuisance") + if not callable(labels): + raise Exception("rename_nuisance loaded from {} is not callable".format(labels)) + else: + with open(labels, "r") as f: + labels = json.load(f) + elif not labels: + labels = {} + + if not isinstance(labels, dict): + # labels is a renaming function, call it for all parameters and store the result when + # names changed + _labels = {} + for name in parameter_names: + new_name = labels(name) + if new_name: + _labels[name] = new_name + labels = _labels + else: + # expand regular expressions through eager interpolation using parameter names + for k, v in labels.items(): + if not k.startswith("^") or not k.endswith("$"): + continue + for name in parameter_names: + # skip explicit translations, effectively giving them priority + if name in labels: + continue + # apply the pattern + new_name = re.sub(k, v, name) + # store a translation label when set + if new_name: + labels[name] = new_name + + return labels + + def get_y_range(y_min_value, y_max_value, y_min=None, y_max=None, log=False, y_min_log=1e-3, top_margin=0.38, visible_margin=0.4): if log: @@ -388,6 +433,21 @@ def _get_contour(hist, level): return contours +def get_contour_box(graphs): + assert graphs + + x_values, y_values = [], [] + for g in graphs: + x, y = get_graph_points(g, errors=False) + x_values.extend(x) + y_values.extend(y) + + if not x_values or not y_values: + return None, None, None, None + + return min(x_values), max(x_values), min(y_values), max(y_values) + + def get_graph_points(g, errors=False): ROOT = import_ROOT() @@ -425,21 +485,6 @@ def get_graph_points(g, errors=False): return x_values, y_values -def get_contour_box(graphs): - assert graphs - - x_values, y_values = [], [] - for g in graphs: - x, y = get_graph_points(g, errors=False) - x_values.extend(x) - y_values.extend(y) - - if not x_values or not y_values: - return None, None, None, None - - return min(x_values), max(x_values), min(y_values), max(y_values) - - def repeat_graph(g, n): points = sum((n * [p] for p in zip(*get_graph_points(g))), []) diff --git a/dhi/scripts/plot_datacard_shapes.py b/dhi/scripts/plot_datacard_shapes.py index d1e261934e0b4d3ff7db03e7bd2eebd98f1d9057..ef515833c77ed7552b5800dabeb5fe661773be1c 100755 --- a/dhi/scripts/plot_datacard_shapes.py +++ b/dhi/scripts/plot_datacard_shapes.py @@ -43,9 +43,9 @@ from dhi.config import colors logger = create_console_logger(os.path.splitext(os.path.basename(__file__))[0]) -def plot_datacard_shapes(datacard, rules, stack=False, directory=".", - nom_format="{bin}__{process}.pdf", syst_format="{bin}__{process}__{syst}.pdf", mass="125", - **plot_kwargs): +def plot_datacard_shapes(datacard, rules, stack=False, directory=".", file_type="pdf", + nom_format="{bin}__{process}.{file_type}", + syst_format="{bin}__{process}__{syst}.{file_type}", mass="125", **plot_kwargs): """ Reads a *datacard* and plots its histogram shapes according to certain *rules*. A rule should be a tuple consisting of a datacard bin, a datacard process and an optional name of a systematic @@ -280,23 +280,24 @@ def plot_datacard_shapes(datacard, rules, stack=False, directory=".", continue # draw the shape - create_shape_plot(bin_name, proc_label, proc_shapes, param, directory, nom_format, - syst_format, **plot_kwargs) + create_shape_plot(bin_name, proc_label, proc_shapes, param, directory, file_type, + nom_format, syst_format, **plot_kwargs) @use_style("dhi_default") -def create_shape_plot(bin_name, proc_label, proc_shapes, param, directory, nom_format, syst_format, - binning="original", x_title="Datacard shape", y_min=None, y_max=None, y_min2=None, - y_max2=None, y_log=False, campaign_label=None): +def create_shape_plot(bin_name, proc_label, proc_shapes, param, directory, file_type, nom_format, + syst_format, binning="original", x_title="Datacard shape", y_min=None, y_max=None, + y_min2=None, y_max2=None, y_log=False, campaign_label=None): import plotlib.root as r ROOT = import_ROOT() # check if systematic shifts are to be plotted, determine the plot path plot_syst = param is not None and any(d is not None for _, d, _ in proc_shapes.values()) if plot_syst: - path = syst_format.format(bin=bin_name, process=proc_label, syst=param["name"]) + path = syst_format.format(bin=bin_name, process=proc_label, syst=param["name"], + file_type=file_type) else: - path = nom_format.format(bin=bin_name, process=proc_label) + path = nom_format.format(bin=bin_name, process=proc_label, file_type=file_type) path = path.replace("*", "X").replace("?", "Y").replace("!", "N") path = os.path.join(directory, path) logger.debug("going to create plot at {} for shapes in bin {} and process {}, stacking {} " @@ -384,6 +385,7 @@ def create_shape_plot(bin_name, proc_label, proc_shapes, param, directory, nom_f r.setup_pad(pad2, props={"TopMargin": 0.7, "Gridy": 1}) else: canvas, (pad1,) = r.routines.create_canvas() + r.setup_pad(pad1, props={"Logy": y_log}) pad1.cd() draw_objs1 = [] draw_objs2 = [] @@ -566,10 +568,13 @@ if __name__ == "__main__": "plots per process machted by a rule, stack distributions and create a single plot") parser.add_argument("--directory", "-d", default=".", help="directory in which produced plots " "are saved; defaults to the current directory") - parser.add_argument("--nom-format", default="{bin}__{process}.pdf", help="format for created " - "files when creating only nominal shapes; default: {bin}__{process}.pdf") - parser.add_argument("--syst-format", default="{bin}__{process}__{syst}.pdf", help="format for " - "created files when creating systematic shapes; default: {bin}__{process}__{syst}.pdf") + parser.add_argument("--file-type", "-f", default="pdf", help="the file type to produce; " + "default: pdf") + parser.add_argument("--nom-format", default="{bin}__{process}.{file_type}", help="format for " + "created files when creating only nominal shapes; default: {bin}__{process}.{file_type}") + parser.add_argument("--syst-format", default="{bin}__{process}__{syst}.{file_type}", + help="format for created files when creating systematic shapes; default: " + "{bin}__{process}__{syst}.{file_type}") parser.add_argument("--mass", "-m", default="125", help="mass hypothesis; default: 125") parser.add_argument("--binning", "-b", default="original", choices=["original", "numbers", "numbers_width"], help="the binning strategy; 'original': use original bin edges; " @@ -594,6 +599,7 @@ if __name__ == "__main__": # run the renaming with patch_object(logger, "name", args.log_name): plot_datacard_shapes(args.input, args.rules, stack=args.stack, directory=args.directory, - nom_format=args.nom_format, syst_format=args.syst_format, mass=args.mass, - binning=args.binning, x_title=args.x_title, y_min=args.y_min, y_max=args.y_max, - y_min2=args.y_min2, y_max2=args.y_max2, y_log=args.y_log, campaign_label=args.campaign) + file_type=args.file_type, nom_format=args.nom_format, syst_format=args.syst_format, + mass=args.mass, binning=args.binning, x_title=args.x_title, y_min=args.y_min, + y_max=args.y_max, y_min2=args.y_min2, y_max2=args.y_max2, y_log=args.y_log, + campaign_label=args.campaign) diff --git a/dhi/tasks/__init__.py b/dhi/tasks/__init__.py index 711c61ad393b2195e491c0c6c79349ff0e00d947..20d1b25d3ce62d8ba332db148b6fb410b1ff70f5 100644 --- a/dhi/tasks/__init__.py +++ b/dhi/tasks/__init__.py @@ -4,6 +4,7 @@ # import all task modules import dhi.tasks.base import dhi.tasks.combine +import dhi.tasks.snapshot import dhi.tasks.limits import dhi.tasks.likelihoods import dhi.tasks.significances diff --git a/dhi/tasks/base.py b/dhi/tasks/base.py index 76d0948b32d07ea1fded127e8ee22d325ff31830..328b95f781b31baa2565864220c12724478c9a73 100644 --- a/dhi/tasks/base.py +++ b/dhi/tasks/base.py @@ -60,8 +60,16 @@ class BaseTask(law.Task): cmd = dep.get_command() if cmd: - cmd = law.util.quote_cmd(cmd) if isinstance(cmd, (list, tuple)) else cmd - text += law.util.colored(cmd, "cyan") + # when cmd is a 2-tuple, i.e. the real command and a representation for printing + # pick the second one + if isinstance(cmd, tuple) and len(cmd) == 2: + cmd = cmd[1] + else: + if isinstance(cmd, list): + cmd = law.util.quote_cmd(cmd) + # defaut highlighting + cmd = law.util.colored(cmd, "cyan") + text += cmd else: text += law.util.colored("empty", "red") print(offset + text) @@ -452,10 +460,14 @@ class CommandTask(AnalysisTask): parent.touch() handled_parent_uris.add(parent.uri()) - def run_command(self, cmd, optional=False, **kwargs): + def run_command(self, cmd, highlighted_cmd=None, optional=False, **kwargs): # proper command encoding cmd = (law.util.quote_cmd(cmd) if isinstance(cmd, (list, tuple)) else cmd).strip() + # default highlighted command + if not highlighted_cmd: + highlighted_cmd = law.util.colored(cmd, "cyan") + # when no cwd was set and run_command_in_tmp is True, create a tmp dir if "cwd" not in kwargs and self.run_command_in_tmp: tmp_dir = law.LocalDirectoryTarget(is_tmp=True) @@ -464,7 +476,7 @@ class CommandTask(AnalysisTask): self.publish_message("cwd: {}".format(kwargs.get("cwd", os.getcwd()))) # call it - with self.publish_step("running '{}' ...".format(law.util.colored(cmd, "cyan"))): + with self.publish_step("running '{}' ...".format(highlighted_cmd)): p, lines = law.util.readable_popen(cmd, shell=True, executable="/bin/bash", **kwargs) for line in lines: print(line) @@ -487,6 +499,9 @@ class CommandTask(AnalysisTask): # get the command cmd = self.get_command() + if isinstance(cmd, tuple) and len(cmd) == 2: + kwargs["highlighted_cmd"] = cmd[1] + cmd = cmd[0] # run it self.run_command(cmd, **kwargs) diff --git a/dhi/tasks/combine.py b/dhi/tasks/combine.py index 2a6306a6ebea34ddaeeb51e7a61acd20d2ac2362..8c04e5a3bf4cf3eac37ed72c845a010eb9c611e0 100644 --- a/dhi/tasks/combine.py +++ b/dhi/tasks/combine.py @@ -41,17 +41,21 @@ class HHModelTask(AnalysisTask): provides a few convenience functions for working with it. """ + DEFAULT_HH_MODULE = "HHModelPinv" + DEFAULT_HH_MODEL = "model_default" + valid_hh_model_options = { "noNNLOscaling", "noBRscaling", "noHscaling", "noklDependentUnc", "doProfilekl", "doProfilekt", "doProfileCV", "doProfileC2V", "doProfileC2", } hh_model = luigi.Parameter( - default="HHModelPinv.model_default", + default="{}.{}".format(DEFAULT_HH_MODULE, DEFAULT_HH_MODEL), description="the location of the HH model to use with optional configuration options in " - "the format module.model_name[@opt1[@opt2...]]; when no module name is given, the default " - "one 'dhi.models.HHModelPinv' is assumed; valid options are {}; default: " - "HHModelPinv.model_default".format(",".join(valid_hh_model_options)), + "the format [module.]model_name[@opt1[@opt2...]]; when no module is given, the default " + "'{0}' is assumed; valid options are {2}; default: {0}.{1}".format( + DEFAULT_HH_MODULE, DEFAULT_HH_MODEL, ",".join(valid_hh_model_options), + ), ) allow_empty_hh_model = False @@ -74,14 +78,13 @@ class HHModelTask(AnalysisTask): # the format used to be "module:model_name" before so adjust it to support legacy commands hh_model = hh_model.replace(":", ".") - # when there is no "." in the string, assume it to be the name of a model in the default - # model file "HHModelPinv" + # when there is no "." in the string, assume it to be the default module if "." not in hh_model: - hh_model = "HHModelPinv.{}".format(hh_model) + hh_model = "{}.{}".format(cls.DEFAULT_HH_MODULE, hh_model) # split into module, model name and options front, options = hh_model.split("@", 1) if "@" in hh_model else (hh_model, "") - module_id, model_name = front.rsplit(".", 1) if "." in front else ("HHModelPinv", front) + module_id, model_name = front.rsplit(".", 1) if "." in front else (cls.DEFAULT_HH_MODULE, front) options = options.split("@") if options else [] # check if options are valid and split them into key value pairs @@ -1071,8 +1074,7 @@ class POITask(DatacardTask, ParameterValuesTask): # store available pois on task level and potentially update them according to the model if self.hh_model_empty: - self.r_pois = tuple(self.__class__.r_pois) - self.k_pois = tuple(self.__class__.k_pois) + self.r_pois, self.k_pois = self.get_empty_hh_model_pois() else: model = self.load_hh_model()[1] self.r_pois = tuple(model.r_pois) @@ -1103,6 +1105,11 @@ class POITask(DatacardTask, ParameterValuesTask): raise Exception("{!r}: parameter values are not allowed to be in POIs, but " "found '{}'".format(self, p)) + def get_empty_hh_model_pois(self): + # hook that can be implemented to configure the r (and possibly k) POIs to be used + # when not hh model is configured + return tuple(self.__class__.r_pois), tuple(self.__class__.k_pois) + def store_parts(self): parts = super(POITask, self).store_parts() parts["poi"] = "poi_{}".format("_".join(self.pois)) @@ -1243,6 +1250,14 @@ class POIScanTask(POITask, ParameterScanTask): raise Exception("scan parameters are not allowed to be in parameter ranges, " "but found {}".format(p)) + def _joined_parameter_values_pois(self): + pois = super(POIScanTask, self)._joined_parameter_values_pois() + + # skip scan parameters + pois = [p for p in pois if p not in self.scan_parameter_names] + + return pois + def get_output_postfix_pois(self): use_all_pois = self.allow_parameter_values_in_pois or self.force_scan_parameters_equal_pois return self.all_pois if use_all_pois else self.other_pois @@ -1265,14 +1280,6 @@ class POIScanTask(POITask, ParameterScanTask): return self.join_postfix(parts) if join else parts - def _joined_parameter_values_pois(self): - pois = super(POIScanTask, self)._joined_parameter_values_pois() - - # skip scan parameters - pois = [p for p in pois if p not in self.scan_parameter_names] - - return pois - class POIPlotTask(PlotTask, POITask): @@ -1388,9 +1395,16 @@ class CombineCommandTask(CommandTask): @property def combine_optimization_args(self): + # start with minimizer args args = self.get_minimizer_args() + + # additional args + args += " --X-rtd REMOVE_CONSTANT_ZERO_POINT=1" + + # optimization for discrete parameters (as suggested by bbgg) if self.optimize_discretes: args += " " + self.combine_discrete_options + return args def get_command(self): @@ -1405,10 +1419,18 @@ class CombineCommandTask(CommandTask): def is_opt(s): return s.startswith("-") and not is_number(s[1:]) + # highlighting + cyan = lambda s: law.util.colored(s, "cyan") + cyan_bright = lambda s: law.util.colored(s, "cyan", style="bright") + bright = lambda s: law.util.colored(s, "default", style="bright") + # extract the "combine ..." part - cmds = [c.strip() for c in cmd.split(" && ")] - for i, c in enumerate(list(cmds)): + cmds = [] + highlighted_cmds = [] + for c in (c.strip() for c in cmd.split(" && ")): if not c.startswith("combine "): + cmds.append(c) + highlighted_cmds.append(cyan(c)) continue # first, replace aliases @@ -1448,9 +1470,16 @@ class CombineCommandTask(CommandTask): ] # build the full command again - cmds[i] = law.util.quote_cmd(["combine"] + leading_values + law.util.flatten(params)) + cmds.append(law.util.quote_cmd(["combine"] + leading_values + law.util.flatten(params))) + + # add the highlighted command + highlighted_cmd = [] + for i, p in enumerate(shlex.split(cmds[-1])): + func = cyan_bright if (i == 0 and p == "combine") or p.startswith("--") else cyan + highlighted_cmd.append(func(p)) + highlighted_cmds.append(" ".join(highlighted_cmd)) - return " && ".join(cmds) + return " && ".join(cmds), bright(" && ").join(highlighted_cmds) class InputDatacards(DatacardTask, law.ExternalTask): diff --git a/dhi/tasks/exclusion.py b/dhi/tasks/exclusion.py index 59a380d596f3a53f4db82bda5b7a996b0bf527fb..5ec3d928c66411ca38c0a473f0577c0ceb9b1f3e 100644 --- a/dhi/tasks/exclusion.py +++ b/dhi/tasks/exclusion.py @@ -11,12 +11,13 @@ import luigi from dhi.tasks.base import BoxPlotTask, view_output_plots from dhi.tasks.combine import MultiDatacardTask, POIScanTask, POIPlotTask +from dhi.tasks.snapshot import SnapshotUser from dhi.tasks.limits import MergeUpperLimits, PlotUpperLimits from dhi.tasks.likelihoods import MergeLikelihoodScan, PlotLikelihoodScan from dhi.config import br_hh -class PlotExclusionAndBestFit(POIScanTask, MultiDatacardTask, POIPlotTask, BoxPlotTask): +class PlotExclusionAndBestFit(POIScanTask, MultiDatacardTask, POIPlotTask, SnapshotUser, BoxPlotTask): show_best_fit = PlotLikelihoodScan.show_best_fit show_best_fit_error = PlotLikelihoodScan.show_best_fit_error @@ -55,6 +56,14 @@ class PlotExclusionAndBestFit(POIScanTask, MultiDatacardTask, POIPlotTask, BoxPl return reqs + def get_output_postfix(self, join=True): + parts = super(PlotExclusionAndBestFit, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts + def output(self): names = self.create_plot_names(["exclusionbestfit", self.get_output_postfix()]) return [self.local_target(name) for name in names] @@ -129,7 +138,7 @@ class PlotExclusionAndBestFit(POIScanTask, MultiDatacardTask, POIPlotTask, BoxPl ) -class PlotExclusionAndBestFit2D(POIScanTask, POIPlotTask): +class PlotExclusionAndBestFit2D(POIScanTask, POIPlotTask, SnapshotUser): show_best_fit = PlotLikelihoodScan.show_best_fit show_best_fit_error = copy.copy(PlotLikelihoodScan.show_best_fit_error) @@ -190,6 +199,14 @@ class PlotExclusionAndBestFit2D(POIScanTask, POIPlotTask): return reqs + def get_output_postfix(self, join=True): + parts = super(PlotExclusionAndBestFit2D, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts + def output(self): names = self.create_plot_names(["exclusionbestfit2d", self.get_output_postfix()]) return [self.local_target(name) for name in names] diff --git a/dhi/tasks/gof.py b/dhi/tasks/gof.py index e06b09b245b165d86cf3f94a81913f2854713029..7ddf9e18772436dc7cc9c4ae96fcdd47201dcb86 100644 --- a/dhi/tasks/gof.py +++ b/dhi/tasks/gof.py @@ -15,9 +15,10 @@ from dhi.tasks.combine import ( POIPlotTask, CreateWorkspace, ) +from dhi.tasks.snapshot import Snapshot, SnapshotUser -class GoodnessOfFitBase(POITask): +class GoodnessOfFitBase(POITask, SnapshotUser): toys = luigi.IntParameter( default=1, @@ -49,11 +50,19 @@ class GoodnessOfFitBase(POITask): parts["gof"] = self.algorithm return parts + def get_output_postfix(self, join=True): + parts = super(GoodnessOfFitBase, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + if self.frequentist_toys: + parts.append("freqtoys") + + return self.join_postfix(parts) if join else parts + @property def toys_postfix(self): - postfix = "t{}_pt{}".format(self.toys, self.toys_per_task) - if self.frequentist_toys: - postfix += "_freq" + return "t{}_pt{}".format(self.toys, self.toys_per_task) return postfix @@ -82,23 +91,36 @@ class GoodnessOfFit(GoodnessOfFitBase, CombineCommandTask, law.LocalWorkflow, HT def workflow_requires(self): reqs = super(GoodnessOfFit, self).workflow_requires() - reqs["workspace"] = self.requires_from_branch() + reqs["workspace"] = CreateWorkspace.req(self) + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self) return reqs def requires(self): - return CreateWorkspace.req(self) + reqs = {"workspace": CreateWorkspace.req(self)} + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self, branch=0) + return reqs def output(self): + parts = [] if self.branch == 0: - postfix = "b0_data" + parts.append("b0_data") else: - postfix = "b{}_toy{}To{}".format(self.branch, self.branch_data[0], self.branch_data[-1]) - if self.frequentist_toys: - postfix += "_freq" - name = self.join_postfix(["gof", self.get_output_postfix(), postfix]) + parts.append("b{}_toy{}To{}".format(self.branch, self.branch_data[0], self.branch_data[-1])) + + name = self.join_postfix(["gof", self.get_output_postfix(), parts]) return self.local_target(name + ".root") def build_command(self): + # get the workspace to use and define snapshot args + if self.use_snapshot: + workspace = self.input()["snapshot"].path + snapshot_args = " --snapshotName MultiDimFit" + else: + workspace = self.input()["workspace"].path + snapshot_args = "" + # toy options toy_opts = "" if self.branch > 0: @@ -106,7 +128,8 @@ class GoodnessOfFit(GoodnessOfFitBase, CombineCommandTask, law.LocalWorkflow, HT if self.frequentist_toys: toy_opts += " --toysFrequentist" - return ( + # build the command + cmd = ( "combine -M GoodnessOfFit {workspace}" " {self.custom_args}" " --verbose 1" @@ -119,16 +142,20 @@ class GoodnessOfFit(GoodnessOfFitBase, CombineCommandTask, law.LocalWorkflow, HT " --setParameters {self.joined_parameter_values}" " --freezeParameters {self.joined_frozen_parameters}" " --freezeNuisanceGroups {self.joined_frozen_groups}" + " {snapshot_args}" " {self.combine_optimization_args}" " && " "mv higgsCombineTest.GoodnessOfFit.mH{self.mass_int}.{self.branch}.root {output}" ).format( self=self, - workspace=self.input().path, + workspace=workspace, output=self.output().path, toy_opts=toy_opts, + snapshot_args=snapshot_args, ) + return cmd + def htcondor_output_postfix(self): postfix = super(GoodnessOfFit, self).htcondor_output_postfix() return "{}__{}".format(postfix, self.toys_postfix) @@ -257,10 +284,7 @@ class PlotMultipleGoodnessOfFits(PlotGoodnessOfFit, MultiDatacardTask, BoxPlotTa @property def toys_postfix(self): tpl_to_str = lambda tpl: "_".join(map(str, tpl)) - postfix = "t{}_pt{}".format(tpl_to_str(self.toys), tpl_to_str(self.toys_per_task)) - if self.frequentist_toys: - postfix += "_freq" - return postfix + return "t{}_pt{}".format(tpl_to_str(self.toys), tpl_to_str(self.toys_per_task)) def requires(self): return [ diff --git a/dhi/tasks/likelihoods.py b/dhi/tasks/likelihoods.py index d04564b7fecd94caf7d2736ed9056cb901dff18d..36d2aa0c74623d42d1a21d087ea893eaa8af15e2 100644 --- a/dhi/tasks/likelihoods.py +++ b/dhi/tasks/likelihoods.py @@ -5,9 +5,11 @@ Tasks related to likelihood scans. """ import copy +from operator import mul import law import luigi +import six from dhi.tasks.base import HTCondorWorkflow, view_output_plots from dhi.tasks.combine import ( @@ -18,15 +20,26 @@ from dhi.tasks.combine import ( POIPlotTask, CreateWorkspace, ) -from dhi.util import unique_recarray, extend_recarray, convert_dnll2 +from dhi.tasks.snapshot import Snapshot, SnapshotUser +from dhi.config import poi_data +from dhi.util import unique_recarray, extend_recarray, convert_dnll2, linspace -class LikelihoodBase(POIScanTask): +class LikelihoodBase(POIScanTask, SnapshotUser): pois = copy.copy(POIScanTask.pois) pois._default = ("kl",) force_scan_parameters_equal_pois = True + allow_parameter_values_in_pois = True + + def get_output_postfix(self, join=True): + parts = super(LikelihoodBase, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow): @@ -34,68 +47,109 @@ class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCo run_command_in_tmp = True def create_branch_map(self): - linspace = self.get_scan_linspace() - - # when blinded and the expected best fit values of r and k POIs (== 1) are not contained - # in the grid points, log an error as this leads to a scenario where the global minimum nll - # is not computed and the deltas to all other nll values become arbitrary and incomparable - if self.blinded: - for i, poi in enumerate(self.pois): - values = [tpl[i] for tpl in linspace] - if poi in self.all_pois and 1 not in values: - scan = "start: {}, stop: {}, points: {}".format(*self.scan_parameters[i][1:]) - self.logger.error("the expected best fit value of 1 is not contained in the " - "values to scan for POI {} ({}), leading to dnll values being computed " - "relative to an arbitrary minimum".format(poi, scan)) - - return linspace + return self.get_scan_linspace() def workflow_requires(self): reqs = super(LikelihoodScan, self).workflow_requires() - reqs["workspace"] = self.requires_from_branch() + reqs["workspace"] = CreateWorkspace.req(self) + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self) return reqs def requires(self): - return CreateWorkspace.req(self) + reqs = {"workspace": CreateWorkspace.req(self)} + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self, branch=0) + return reqs def output(self): - return self.local_target("likelihood__" + self.get_output_postfix() + ".root") + name = self.join_postfix(["likelihood", self.get_output_postfix()]) + ".root" + return self.local_target(name) - @property - def blinded_args(self): + def build_command(self): + # get the workspace to use and define snapshot args + if self.use_snapshot: + workspace = self.input()["snapshot"].path + snapshot_args = " --snapshotName MultiDimFit" + else: + workspace = self.input()["workspace"].path + snapshot_args = "" + + # args for blinding / unblinding if self.unblinded: - return "--seed {self.branch}".format(self=self) + blinded_args = "--seed {self.branch}".format(self=self) else: - return "--seed {self.branch} --toys {self.toys}".format(self=self) + blinded_args = "--seed {self.branch} --toys {self.toys}".format(self=self) + + # ensure that ranges of scanned parameters contain their SM values (plus one step) + # in order for the likelihood scan to find the expected minimum and compute all deltaNLL + # values with respect that minimum; otherwise, results of different scans cannot be stitched + # together as they were potentially compared against different minima; this could be + # simplified by https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit/pull/686 which + # would only require to set "--setParameterRangesForGrid {self.joined_scan_ranges}" + ext_ranges = [] + for p, ranges in self.scan_parameters_dict.items(): + # gather data + start, stop, points = ranges[0] + sm_value = poi_data.get(p, {}).get("sm_value", 1.) + step_size = (float(stop - start) / (points - 1)) if points > 1 else 1 + assert step_size > 0 + # decrease the starting point until the sm value is fully contained + while start >= sm_value: + start -= step_size + points += 1 + # increase the stopping point until the sm value is fully contained + while stop <= sm_value: + stop += step_size + points += 1 + # store the extended range + ext_ranges.append((start, stop, points)) + # compute the new n-D point space + ext_space = self._get_scan_linspace(ext_ranges) + # get the point index of the current branch + ext_point = ext_space.index(self.branch_data) + # recreate joined expressions for the combine command + ext_joined_scan_points = ",".join(map(str, (p for _, _, p in ext_ranges))) + ext_joined_scan_ranges = ":".join( + "{}={},{}".format(name, start, stop) + for name, (start, stop, _) in zip(self.scan_parameter_names, ext_ranges) + ) - def build_command(self): - return ( + # build the command + cmd = ( "combine -M MultiDimFit {workspace}" " {self.custom_args}" " --verbose 1" " --mass {self.mass}" - " {self.blinded_args}" + " {blinded_args}" " --algo grid" " --redefineSignalPOIs {self.joined_pois}" - " --gridPoints {self.joined_scan_points}" - " --firstPoint {self.branch}" - " --lastPoint {self.branch}" + " --gridPoints {ext_joined_scan_points}" + " --firstPoint {ext_point}" + " --lastPoint {ext_point}" " --alignEdges 1" - " --setParameterRanges {self.joined_scan_ranges}:{self.joined_parameter_ranges}" + " --setParameterRanges {ext_joined_scan_ranges}:{self.joined_parameter_ranges}" " --setParameters {self.joined_parameter_values}" " --freezeParameters {self.joined_frozen_parameters}" " --freezeNuisanceGroups {self.joined_frozen_groups}" + " {snapshot_args}" " --saveNLL" - " --X-rtd REMOVE_CONSTANT_ZERO_POINT=1" " {self.combine_optimization_args}" " && " "mv higgsCombineTest.MultiDimFit.mH{self.mass_int}.{self.branch}.root {output}" ).format( self=self, - workspace=self.input().path, + workspace=workspace, output=self.output().path, + blinded_args=blinded_args, + snapshot_args=snapshot_args, + ext_joined_scan_points=ext_joined_scan_points, + ext_joined_scan_ranges=ext_joined_scan_ranges, + ext_point=ext_point, ) + return cmd + class MergeLikelihoodScan(LikelihoodBase): @@ -103,7 +157,8 @@ class MergeLikelihoodScan(LikelihoodBase): return LikelihoodScan.req(self) def output(self): - return self.local_target("limits__" + self.get_output_postfix() + ".npz") + name = self.join_postfix(["likelihoods", self.get_output_postfix()]) + ".npz" + return self.local_target(name) @law.decorator.log @law.decorator.safe_output @@ -355,14 +410,14 @@ class PlotLikelihoodScan(LikelihoodBase, POIPlotTask): paper=self.paper, ) - def load_scan_data(self, inputs, merge_scans=True, recompute_dnll2=True): + def load_scan_data(self, inputs, recompute_dnll2=True, merge_scans=True): return self._load_scan_data(inputs, self.scan_parameter_names, - self.get_scan_parameter_combinations(), merge_scans=merge_scans, - recompute_dnll2=recompute_dnll2) + self.get_scan_parameter_combinations(), recompute_dnll2=recompute_dnll2, + merge_scans=merge_scans) @classmethod def _load_scan_data(cls, inputs, scan_parameter_names, scan_parameter_combinations, - merge_scans=True, recompute_dnll2=True): + recompute_dnll2=True, merge_scans=True): import numpy as np # load values of each input @@ -376,18 +431,27 @@ class PlotLikelihoodScan(LikelihoodBase, POIPlotTask): for i in range(len(scan_parameter_names)) ]) + # nll0 values must be identical per input (otherwise there might be an issue with the model) + for v in values: + nll_unique = np.unique(v["nll0"]) + nll_unique = nll_unique[~np.isnan(nll_unique)] + if len(nll_unique) != 1: + raise Exception("found {} different nll0 values in scan data which indicates in " + "issue with the model: {}".format(len(nll_unique), nll_unique)) + + # recompute dnll2 from the minimum nll and fit_nll + if recompute_dnll2: + # use the overall minimal nll as a common reference value when merging + min_nll = min(np.nanmin(v["nll"]) for v in values) + for v in values: + _min_nll = min_nll if merge_scans else np.nanmin(v["nll"]) + v["dnll2"] = 2 * (v["fit_nll"] - _min_nll) + # concatenate values and safely remove duplicates when configured if merge_scans: test_fn = lambda kept, removed: kept < 1e-7 or abs((kept - removed) / kept) < 0.001 values = unique_recarray(values, cols=scan_parameter_names, - test_metric=("fit_nll", test_fn)) - - # recompute dnll2 from the minimum nll and fit_nll - if recompute_dnll2: - _values = [values] if merge_scans else values - min_nll = min(np.nanmin(v["nll"]) for v in _values) - for v in _values: - v["dnll2"] = 2 * (v["fit_nll"] - min_nll) + test_metric=("dnll2", test_fn)) # pick the most appropriate poi mins poi_mins = cls._select_poi_mins(all_poi_mins, scan_parameter_combinations) @@ -396,22 +460,32 @@ class PlotLikelihoodScan(LikelihoodBase, POIPlotTask): @classmethod def _select_poi_mins(cls, poi_mins, scan_parameter_combinations): - # pick the poi mins for the scan range that has the lowest step size around the mins - # the combined step size of multiple dims is simply defined by their sum - min_step_size = 1e5 - best_poi_mins = poi_mins[0] - for _poi_mins, scan_parameters in zip(poi_mins, scan_parameter_combinations): - if None in _poi_mins: - continue - # each min is required to be in the corresponding scan range - if not all((a <= v <= b) for v, (_, a, b, _) in zip(_poi_mins, scan_parameters)): - continue - # compute the merged step size - step_size = sum((b - a) / (n - 1.) for (_, a, b, n) in scan_parameters) - # store - if step_size < min_step_size: - min_step_size = step_size - best_poi_mins = _poi_mins + # pick the poi min corrsponding to the largest spanned region, i.e., range / area / volume + regions = [ + (i, six.moves.reduce(mul, [stop - start for _, start, stop, _ in scan_range])) + for i, scan_range in enumerate(scan_parameter_combinations) + ] + best_i = sorted(regions, key=lambda pair: -pair[1])[0][0] + best_poi_mins = poi_mins[best_i] + + # old algorithm: + # # pick the poi mins for the scan range that has the lowest step size around the mins + # # the combined step size of multiple dims is simply defined by their sum + # min_step_size = 1e5 + # best_poi_mins = poi_mins[0] + # for _poi_mins, scan_parameters in zip(poi_mins, scan_parameter_combinations): + # if None in _poi_mins: + # continue + # # each min is required to be in the corresponding scan range + # if not all((a <= v <= b) for v, (_, a, b, _) in zip(_poi_mins, scan_parameters)): + # continue + # # compute the merged step size + # step_size = sum((b - a) / (n - 1.) for (_, a, b, n) in scan_parameters) + # # store + # if step_size < min_step_size: + # min_step_size = step_size + # best_poi_mins = _poi_mins + return best_poi_mins diff --git a/dhi/tasks/limits.py b/dhi/tasks/limits.py index 86db70a9adb3e95b571ab82bb68850b6d6012358..c0b680143024836e9f0361235c1ac6f3665b63cb 100644 --- a/dhi/tasks/limits.py +++ b/dhi/tasks/limits.py @@ -18,14 +18,23 @@ from dhi.tasks.combine import ( POIPlotTask, CreateWorkspace, ) +from dhi.tasks.snapshot import Snapshot, SnapshotUser from dhi.util import unique_recarray, real_path -from dhi.config import br_hh +from dhi.config import br_hh, poi_data -class UpperLimitsBase(POIScanTask): +class UpperLimitsBase(POIScanTask, SnapshotUser): force_scan_parameters_unequal_pois = True + def get_output_postfix(self, join=True): + parts = super(UpperLimitsBase, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts + class UpperLimits(UpperLimitsBase, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow): @@ -36,16 +45,30 @@ class UpperLimits(UpperLimitsBase, CombineCommandTask, law.LocalWorkflow, HTCond def workflow_requires(self): reqs = super(UpperLimits, self).workflow_requires() - reqs["workspace"] = self.requires_from_branch() + reqs["workspace"] = CreateWorkspace.req(self) + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self) return reqs def requires(self): - return CreateWorkspace.req(self) + reqs = {"workspace": CreateWorkspace.req(self)} + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self, branch=0) + return reqs def output(self): - return self.local_target("limit__" + self.get_output_postfix() + ".root") + name = self.join_postfix(["limit", self.get_output_postfix()]) + ".root" + return self.local_target(name) def build_command(self): + # get the workspace to use and define snapshot args + if self.use_snapshot: + workspace = self.input()["snapshot"].path + snapshot_args = " --snapshotName MultiDimFit" + else: + workspace = self.input()["workspace"].path + snapshot_args = "" + # arguments for un/blinding if self.unblinded: blinded_args = "--seed {self.branch}".format(self=self) @@ -69,14 +92,16 @@ class UpperLimits(UpperLimitsBase, CombineCommandTask, law.LocalWorkflow, HTCond " --setParameters {self.joined_scan_values},{self.joined_parameter_values}" " --freezeParameters {self.joined_frozen_parameters}" " --freezeNuisanceGroups {self.joined_frozen_groups}" + " {snapshot_args}" " {self.combine_optimization_args}" " && " "mv higgsCombineTest.AsymptoticLimits.mH{self.mass_int}.{self.branch}.root {output}" ).format( self=self, - workspace=self.input().path, + workspace=workspace, output=self.output().path, blinded_args=blinded_args, + snapshot_args=snapshot_args, ) return cmd @@ -111,7 +136,8 @@ class MergeUpperLimits(UpperLimitsBase): return UpperLimits.req(self) def output(self): - return self.local_target("limits__" + self.get_output_postfix() + ".npz") + name = self.join_postfix(["limits", self.get_output_postfix()]) + ".npz" + return self.local_target(name) @law.decorator.log @law.decorator.safe_output @@ -570,7 +596,7 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask): ) -class PlotUpperLimitsAtPoint(POIPlotTask, MultiDatacardTask, BoxPlotTask): +class PlotUpperLimitsAtPoint(POIPlotTask, SnapshotUser, MultiDatacardTask, BoxPlotTask): xsec = PlotUpperLimits.xsec br = PlotUpperLimits.br @@ -676,12 +702,14 @@ class PlotUpperLimitsAtPoint(POIPlotTask, MultiDatacardTask, BoxPlotTask): return self._external_limit_values def requires(self): - scan_parameter_value = self.parameter_values_dict.get(self.pseudo_scan_parameter, 1.0) - scan_parameter = (self.pseudo_scan_parameter, scan_parameter_value, scan_parameter_value, 1) + default = poi_data.get(self.pseudo_scan_parameter, {}).get("sm_value", 1.0) + value = self.parameter_values_dict.get(self.pseudo_scan_parameter, default) + scan_parameter = (self.pseudo_scan_parameter, value, value, 1) parameter_values = tuple( - pv for pv in self.parameter_values - if not pv.startswith(self.pseudo_scan_parameter + "=") + (p, v) for p, v in self.parameter_values_dict.items() + if p != self.pseudo_scan_parameter ) + return [ UpperLimits.req( self, @@ -692,6 +720,14 @@ class PlotUpperLimitsAtPoint(POIPlotTask, MultiDatacardTask, BoxPlotTask): for datacards in self.multi_datacards ] + def get_output_postfix(self, join=True): + parts = super(PlotUpperLimitsAtPoint, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts + def output(self): # additional postfix parts = [] diff --git a/dhi/tasks/postfit.py b/dhi/tasks/postfit.py index 00e9d4222a4f611c1bb77a63cdbc7b3fd121f723..f7a4f9b6957d89503cdc204f924e7cce1c9cd8c1 100644 --- a/dhi/tasks/postfit.py +++ b/dhi/tasks/postfit.py @@ -12,6 +12,8 @@ import luigi from dhi.tasks.base import HTCondorWorkflow, view_output_plots from dhi.tasks.combine import CombineCommandTask, POITask, POIPlotTask, CreateWorkspace +from dhi.tasks.snapshot import Snapshot, SnapshotUser +from dhi.tasks.pulls_impacts import PlotPullsAndImpacts class SAVEFLAGS(str, enum.Enum): @@ -29,7 +31,7 @@ class SAVEFLAGS(str, enum.Enum): return list(map(lambda x: x.value, cls)) -class FitDiagnostics(POITask, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow): +class FitDiagnostics(POITask, CombineCommandTask, SnapshotUser, law.LocalWorkflow, HTCondorWorkflow): pois = law.CSVParameter( default=("r",), @@ -56,38 +58,56 @@ class FitDiagnostics(POITask, CombineCommandTask, law.LocalWorkflow, HTCondorWor run_command_in_tmp = True def create_branch_map(self): - return [""] # single branch with empty data + # single branch with empty data + return [None] def workflow_requires(self): reqs = super(FitDiagnostics, self).workflow_requires() - reqs["workspace"] = self.requires_from_branch() + reqs["workspace"] = CreateWorkspace.req(self) + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self) return reqs def requires(self): - return CreateWorkspace.req(self) + reqs = {"workspace": CreateWorkspace.req(self)} + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self, branch=0) + return reqs + + def get_output_postfix(self, join=True): + parts = super(FitDiagnostics, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts def output(self): - parts = [self.get_output_postfix()] + parts = [] if not self.skip_b_only: parts.append("withBOnly") if self.skip_save: - parts.append(map("no{}".format, sorted(self.skip_save))) - postfix = self.join_postfix(parts) + parts.append(map("not{}".format, sorted(self.skip_save))) + name = lambda prefix: self.join_postfix([prefix, self.get_output_postfix(), parts]) return { - "result": self.local_target("result__{}.root".format(postfix)), - "diagnostics": self.local_target("fitdiagnostics__{}.root".format(postfix)), + "result": self.local_target(name("result") + ".root"), + "diagnostics": self.local_target(name("fitdiagnostics") + ".root"), } - @property - def blinded_args(self): - if self.unblinded: - return "" + def build_command(self): + # get the workspace to use and define snapshot args + if self.use_snapshot: + workspace = self.input()["snapshot"].path + snapshot_args = " --snapshotName MultiDimFit" else: - return "--toys {self.toys}".format(self=self) + workspace = self.input()["workspace"].path + snapshot_args = "" - def build_command(self): - outputs = self.output() + # arguments for un/blinding + blinded_args = "" + if self.blinded: + blinded_args = "--toys {self.toys}".format(self=self) # prepare optional flags flags = [] @@ -97,18 +117,20 @@ class FitDiagnostics(POITask, CombineCommandTask, law.LocalWorkflow, HTCondorWor if save_flag not in self.skip_save: flags.append("--save{}".format(save_flag)) + outputs = self.output() return ( "combine -M FitDiagnostics {workspace}" " {self.custom_args}" " --verbose 1" " --mass {self.mass}" - " {self.blinded_args}" + " {blinded_args}" " --redefineSignalPOIs {self.joined_pois}" " --setParameterRanges {self.joined_parameter_ranges}" " --setParameters {self.joined_parameter_values}" " --freezeParameters {self.joined_frozen_parameters}" " --freezeNuisanceGroups {self.joined_frozen_groups}" " {flags}" + " {snapshot_args}" " {self.combine_optimization_args}" " && " "mv higgsCombineTest.FitDiagnostics.mH{self.mass_int}{postfix}.root {output_result}" @@ -116,15 +138,28 @@ class FitDiagnostics(POITask, CombineCommandTask, law.LocalWorkflow, HTCondorWor "mv fitDiagnosticsTest.root {output_diagnostics}" ).format( self=self, - workspace=self.input().path, + workspace=workspace, postfix="" if SAVEFLAGS.Toys in self.skip_save else ".123456", output_result=outputs["result"].path, output_diagnostics=outputs["diagnostics"].path, + blinded_args=blinded_args, + snapshot_args=snapshot_args, flags=" ".join(flags), ) -class PlotPostfitSOverB(POIPlotTask): +class PostfitPlotBase(POIPlotTask, SnapshotUser): + + def get_output_postfix(self, join=True): + parts = super(PostfitPlotBase, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts + + +class PlotPostfitSOverB(PostfitPlotBase): pois = FitDiagnostics.pois bins = law.CSVParameter( @@ -245,7 +280,7 @@ class PlotPostfitSOverB(POIPlotTask): ) -class PlotNuisanceLikelihoodScans(POIPlotTask): +class PlotNuisanceLikelihoodScans(PostfitPlotBase): x_min = copy.copy(POIPlotTask.x_min) x_max = copy.copy(POIPlotTask.x_max) @@ -279,6 +314,12 @@ class PlotNuisanceLikelihoodScans(POIPlotTask): description="when True, sort parameters by their hightest likelihood change in the scan " "range; mostly useful when the number of parameters per page is > 1; default: False", ) + show_diff = luigi.BoolParameter( + default=False, + description="when True, the x-axis shows differences of nuisance parameters with respect " + "to the best fit value instead of absolute values; default: False", + ) + labels = PlotPullsAndImpacts.labels mc_stats_patterns = ["*prop_bin*"] @@ -289,10 +330,17 @@ class PlotNuisanceLikelihoodScans(POIPlotTask): force_n_pois = 1 def requires(self): - return FitDiagnostics.req(self, skip_save=("WithUncertainties",)) + # normally, we would require FitDiagnostics without saved uncertainties no matter what, + # but since it could be already complete, use it when it does exist + full_fd = FitDiagnostics.req(self) + if full_fd.complete(): + return full_fd + return FitDiagnostics.req(self, skip_save=("WithUncertainties",), _prefer_cli=["skip_save"]) def output(self): parts = ["nlls", "{}To{}".format(self.x_min, self.x_max), self.get_output_postfix()] + if self.show_diff: + parts.append("diffs") if self.y_log: parts.append("log") if self.sort_max: @@ -341,6 +389,8 @@ class PlotNuisanceLikelihoodScans(POIPlotTask): skip_parameters=skip_parameters, parameters_per_page=self.parameters_per_page, sort_max=self.sort_max, + show_diff=self.show_diff, + labels=None if self.labels == law.NO_STR else self.labels, x_min=self.x_min, x_max=self.x_max, y_min=self.get_axis_limit("y_min"), diff --git a/dhi/tasks/pulls_impacts.py b/dhi/tasks/pulls_impacts.py index e61555ff50367502fadae7f08bbf15c1cf311aa6..2186f20e6d4154d146841565644a86ba9656405d 100644 --- a/dhi/tasks/pulls_impacts.py +++ b/dhi/tasks/pulls_impacts.py @@ -12,10 +12,11 @@ import numpy as np from dhi.tasks.base import HTCondorWorkflow, BoxPlotTask, view_output_plots from dhi.tasks.combine import CombineCommandTask, POITask, POIPlotTask, CreateWorkspace +from dhi.tasks.snapshot import Snapshot, SnapshotUser from dhi.datacard_tools import get_workspace_parameters -class PullsAndImpactsBase(POITask): +class PullsAndImpactsBase(POITask, SnapshotUser): only_parameters = law.CSVParameter( default=(), @@ -31,17 +32,20 @@ class PullsAndImpactsBase(POITask): description="when True, calculate pulls and impacts for MC stats nuisances as well; " "default: False", ) - use_snapshot = luigi.BoolParameter( - default=False, - description="when True, run the initial fit first and use it as a snapshot for nuisance " - "fits; default: False", - ) mc_stats_patterns = ["*prop_bin*"] force_n_pois = 1 allow_parameter_values_in_pois = True + def get_output_postfix(self, join=True): + parts = super(PullsAndImpactsBase, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts + class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow): @@ -50,13 +54,6 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow def __init__(self, *args, **kwargs): super(PullsAndImpacts, self).__init__(*args, **kwargs) - # encourage using snapshots when running unblinded - if self.unblinded and not self.use_snapshot and self.is_workflow() and self.branches != (0,): - self.logger.info_once("unblinded_no_snapshot", "you are running unblinded pulls and " - "impacts without using the initial fit as a snapshot for nuisance fits; you might " - "consider to do so by adding '--use-snapshot' to your command as this can lead to " - "more stable results and faster fits") - self._cache_branches = False def create_branch_map(self): @@ -100,51 +97,41 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow def workflow_requires(self): reqs = super(PullsAndImpacts, self).workflow_requires() reqs["workspace"] = CreateWorkspace.req(self) - if self.use_snapshot and set(self.branch_map) != {0}: - reqs["snapshot"] = self.req(self, branches=[0]) + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self) return reqs def requires(self): reqs = {"workspace": CreateWorkspace.req(self)} - if self.branch > 0 and self.use_snapshot: - reqs["snapshot"] = self.req(self, branch=0) + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self, branch=0) return reqs def output(self): parts = [self.branch_data] - if self.branch > 0 and self.use_snapshot: - parts.append("fromsnapshot") - name = lambda s: self.join_postfix([s, self.get_output_postfix()] + parts) + ".root" - - outputs = {"fit": self.local_target(name("fit"))} - if self.branch == 0: - outputs["fitresult"] = self.local_target(name("fitresult"), optional=True) - - return outputs - - @property - def blinded_args(self): - if self.unblinded: - return "--seed {self.branch}".format(self=self) - else: - return "--seed {self.branch} --toys {self.toys}".format(self=self) + name = self.join_postfix(["fit", self.get_output_postfix(), parts]) + ".root" + return self.local_target(name) def build_command(self): - # check if a snapshot is used - use_snapshot = self.use_snapshot and self.branch > 0 - - # get the workspace to use - if use_snapshot: - workspace = self.input()["snapshot"]["fit"].path + # get the workspace to use and define snapshot args + if self.use_snapshot: + workspace = self.input()["snapshot"].path + snapshot_args = " --snapshotName MultiDimFit" else: workspace = self.input()["workspace"].path + snapshot_args = "" + + # args for blinding / unblinding + if self.unblinded: + blinded_args = "--seed {self.branch}".format(self=self) + else: + blinded_args = "--seed {self.branch} --toys {self.toys}".format(self=self) # define branch dependent options if self.branch == 0: # initial fit branch_opts = ( " --algo singles" - " --saveWorkspace" " --saveFitResult" ) else: @@ -155,17 +142,14 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow " --floatOtherPOIs 1" " --saveInactivePOI 1" ).format(self.branch_data) - if use_snapshot: - branch_opts += " --snapshotName MultiDimFit" # define the basic command - outputs = self.output() cmd = ( "combine -M MultiDimFit {workspace}" " {self.custom_args}" " --verbose 1" " --mass {self.mass}" - " {self.blinded_args}" + " {blinded_args}" " --redefineSignalPOIs {self.pois[0]}" " --setParameterRanges {self.joined_parameter_ranges}" " --setParameters {self.joined_parameter_values}" @@ -173,6 +157,7 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow " --freezeNuisanceGroups {self.joined_frozen_groups}" " --saveNLL" " --X-rtd REMOVE_CONSTANT_ZERO_POINT=1" + " {snapshot_args}" " {self.combine_optimization_args}" " {branch_opts}" " && " @@ -180,14 +165,12 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow ).format( self=self, workspace=workspace, - output=outputs["fit"].path, + output=self.output().path, + snapshot_args=snapshot_args, + blinded_args=blinded_args, branch_opts=branch_opts, ) - # for the initial fit, also move the fitresult - if self.branch == 0: - cmd += " && mv multidimfitTest.root {}".format(outputs["fitresult"].path) - return cmd def htcondor_output_postfix(self): @@ -218,8 +201,6 @@ class MergePullsAndImpacts(PullsAndImpactsBase): parts = [] if self.mc_stats: parts.append("mcstats") - if self.use_snapshot: - parts.append("fromsnapshot") if self.only_parameters: parts.append("only_" + law.util.create_hash(sorted(self.only_parameters))) if self.skip_parameters: @@ -241,7 +222,7 @@ class MergePullsAndImpacts(PullsAndImpactsBase): fit_results = {} fail_info = [] for b, name in branch_map.items(): - inp = inputs[b]["fit"] + inp = inputs[b] if not inp.exists(): self.logger.warning("input of branch {} at {} does not exist".format(b, inp.path)) continue @@ -416,8 +397,6 @@ class PlotPullsAndImpacts(PullsAndImpactsBase, POIPlotTask, BoxPlotTask): parts = [] if self.mc_stats: parts.append("mcstats") - if self.use_snapshot: - parts.append("fromsnapshot") if self.only_parameters: parts.append("only_" + law.util.create_hash(sorted(self.only_parameters))) if self.skip_parameters: diff --git a/dhi/tasks/significances.py b/dhi/tasks/significances.py index 5d42c24416a4ad404c869b55d9cd00d63d56d45e..909676574c8a37be8f750f3eb052a639ed11d443 100644 --- a/dhi/tasks/significances.py +++ b/dhi/tasks/significances.py @@ -15,10 +15,11 @@ from dhi.tasks.combine import ( POIPlotTask, CreateWorkspace, ) +from dhi.tasks.snapshot import Snapshot, SnapshotUser from dhi.util import unique_recarray -class SignificanceBase(POIScanTask): +class SignificanceBase(POIScanTask, SnapshotUser): force_scan_parameters_unequal_pois = True allow_parameter_values_in_pois = True @@ -43,6 +44,8 @@ class SignificanceBase(POIScanTask): if not self.unblinded and self.frequentist_toys: parts.insert(0, ["postfit"]) + if self.use_snapshot: + parts.append("fromsnapshot") return self.join_postfix(parts) if join else parts @@ -56,45 +59,64 @@ class SignificanceScan(SignificanceBase, CombineCommandTask, law.LocalWorkflow, def workflow_requires(self): reqs = super(SignificanceScan, self).workflow_requires() - reqs["workspace"] = self.requires_from_branch() + reqs["workspace"] = CreateWorkspace.req(self) + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self) return reqs def requires(self): - return CreateWorkspace.req(self) + reqs = {"workspace": CreateWorkspace.req(self)} + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self, branch=0) + return reqs def output(self): - return self.local_target("significance__" + self.get_output_postfix() + ".root") + name = self.join_postfix(["significance", self.get_output_postfix()]) + ".root" + return self.local_target(name) + + def build_command(self): + # get the workspace to use and define snapshot args + if self.use_snapshot: + workspace = self.input()["snapshot"].path + snapshot_args = " --snapshotName MultiDimFit" + else: + workspace = self.input()["workspace"].path + snapshot_args = "" - @property - def blinded_args(self): + # arguments for un/blinding if self.unblinded: - return "--seed {self.branch}".format(self=self) + blinded_args = "--seed {self.branch}".format(self=self) elif self.frequentist_toys: - return "--seed {self.branch} --toys {self.toys} --toysFreq".format(self=self) + blinded_args = "--seed {self.branch} --toys {self.toys} --toysFreq".format(self=self) else: - return "--seed {self.branch} --toys {self.toys}".format(self=self) + blinded_args = "--seed {self.branch} --toys {self.toys}".format(self=self) - def build_command(self): - return ( + # build the command + cmd = ( "combine -M Significance {workspace}" " {self.custom_args}" " --verbose 1" " --mass {self.mass}" - " {self.blinded_args}" + " {blinded_args}" " --redefineSignalPOIs {self.joined_pois}" " --setParameterRanges {self.joined_parameter_ranges}" " --setParameters {self.joined_scan_values},{self.joined_parameter_values}" " --freezeParameters {self.joined_frozen_parameters}" " --freezeNuisanceGroups {self.joined_frozen_groups}" + " {snapshot_args}" " {self.combine_optimization_args}" " && " "mv higgsCombineTest.Significance.mH{self.mass_int}.{self.branch}.root {output}" ).format( self=self, - workspace=self.input().path, + workspace=workspace, output=self.output().path, + blinded_args=blinded_args, + snapshot_args=snapshot_args, ) + return cmd + class MergeSignificanceScan(SignificanceBase): @@ -102,7 +124,8 @@ class MergeSignificanceScan(SignificanceBase): return SignificanceScan.req(self) def output(self): - return self.local_target("significance__{}.npz".format(self.get_output_postfix())) + name = self.join_postfix(["significance", self.get_output_postfix()]) + ".npz" + return self.local_target(name) @law.decorator.log def run(self): diff --git a/dhi/tasks/snapshot.py b/dhi/tasks/snapshot.py new file mode 100644 index 0000000000000000000000000000000000000000..1f578c0927be34d24142ceb76fd2fa80153b8390 --- /dev/null +++ b/dhi/tasks/snapshot.py @@ -0,0 +1,103 @@ +# coding: utf-8 + +""" +Tasks related to defining and using snapshots. +""" + +import copy + +import law +import luigi +import six + +from dhi.tasks.base import HTCondorWorkflow +from dhi.tasks.combine import ( + CombineCommandTask, + POITask, + CreateWorkspace, +) + + +class Snapshot(POITask, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow): + + pois = copy.copy(POITask.pois) + pois._default = ("r",) + + force_scan_parameters_equal_pois = True + allow_parameter_values_in_pois = True + run_command_in_tmp = True + + @classmethod + def req_params(cls, *args, **kwargs): + prefer_cli = kwargs.get("_prefer_cli", None) + if isinstance(prefer_cli, six.string_types): + prefer_cli = {prefer_cli} + else: + prefer_cli = {prefer_cli} if prefer_cli else set() + + # prefer all kinds of parameters from the command line + prefer_cli |= { + "toys", "frozen_parameters", "frozen_groups", "minimizer", "parameter_values", + "parameter_ranges", "workflow", "max_runtime", + } + + kwargs["_prefer_cli"] = prefer_cli + return super(Snapshot, cls).req_params(*args, **kwargs) + + def create_branch_map(self): + # single branch that does not need special data + return [None] + + def workflow_requires(self): + reqs = super(Snapshot, self).workflow_requires() + reqs["workspace"] = self.requires_from_branch() + return reqs + + def requires(self): + return CreateWorkspace.req(self) + + def output(self): + name = self.join_postfix(["snapshot", self.get_output_postfix()]) + ".root" + return self.local_target(name) + + def build_command(self): + # args for blinding / unblinding + if self.unblinded: + blinded_args = "--seed {self.branch}".format(self=self) + else: + blinded_args = "--seed {self.branch} --toys {self.toys}".format(self=self) + + # build the command + cmd = ( + "combine -M MultiDimFit {workspace}" + " {self.custom_args}" + " --verbose 1" + " --mass {self.mass}" + " {blinded_args}" + " --redefineSignalPOIs {self.joined_pois}" + " --setParameters {self.joined_parameter_values}" + " --setParameterRanges {self.joined_parameter_ranges}" + " --freezeParameters {self.joined_frozen_parameters}" + " --freezeNuisanceGroups {self.joined_frozen_groups}" + " --saveWorkspace" + " --saveNLL" + " {self.combine_optimization_args}" + " && " + "mv higgsCombineTest.MultiDimFit.mH{self.mass_int}.{self.branch}.root {output}" + ).format( + self=self, + workspace=self.input().path, + output=self.output().path, + blinded_args=blinded_args, + ) + + return cmd + + +class SnapshotUser(object): + + use_snapshot = luigi.BoolParameter( + default=False, + description="when set, perform a MultiDimFit first and use its workspace as a snapshot; " + "default: False", + ) diff --git a/docs/content/datacard_manipulation.md b/docs/content/datacard_manipulation.md index b5bf76ac5ecbf0ae22bcfe52492262d32a6ea17f..69f60ee172cab6fdb996f47108bde8a165627ab3 100644 --- a/docs/content/datacard_manipulation.md +++ b/docs/content/datacard_manipulation.md @@ -791,6 +791,7 @@ optional arguments: > plot_datacard_shapes.py --help usage: plot_datacard_shapes.py [-h] [--stack] [--directory DIRECTORY] + [--file-type FILE_TYPE] [--nom-format NOM_FORMAT] [--syst-format SYST_FORMAT] [--mass MASS] [--binning {original,numbers,numbers_width}] @@ -848,12 +849,14 @@ optional arguments: --directory DIRECTORY, -d DIRECTORY directory in which produced plots are saved; defaults to the current directory + --file-type FILE_TYPE, -f FILE_TYPE + the file type to produce; default: pdf --nom-format NOM_FORMAT format for created files when creating only nominal - shapes; default: {bin}__{process}.pdf + shapes; default: {bin}__{process}.{file_type} --syst-format SYST_FORMAT format for created files when creating systematic - shapes; default: {bin}__{process}__{syst}.pdf + shapes; default: {bin}__{process}__{syst}.{file_type} --mass MASS, -m MASS mass hypothesis; default: 125 --binning {original,numbers,numbers_width}, -b {original,numbers,numbers_width} the binning strategy; 'original': use original bin diff --git a/docs/content/introduction.md b/docs/content/introduction.md index b84fdaa16155f1aaa039fe1d98a16e8fc00df820..18cacf94480f724bc196eb35b192ec7ff7f6360b 100644 --- a/docs/content/introduction.md +++ b/docs/content/introduction.md @@ -95,6 +95,9 @@ module 'dhi.tasks.base', 3 task(s): - BundleRepo - BundleSoftware +module 'dhi.tasks.snapshot', 1 task(s): + - Snapshot + module 'dhi.tasks.limits', 7 task(s): - UpperLimits - PlotUpperLimitsAtPoint @@ -151,7 +154,7 @@ module 'dhi.tasks.exclusion', 2 task(s): - PlotExclusionAndBestFit - PlotExclusionAndBestFit2D -written 44 task(s) to index file '/your/path/inference/.law/index' +written 45 task(s) to index file '/your/path/inference/.law/index' ``` You can type diff --git a/docs/content/snippets/eftupperlimits_param_tab.md b/docs/content/snippets/eftupperlimits_param_tab.md deleted file mode 100644 index 22a6868639c81dcf934da849d58dca8b26ca92d8..0000000000000000000000000000000000000000 --- a/docs/content/snippets/eftupperlimits_param_tab.md +++ /dev/null @@ -1,7 +0,0 @@ -The `EFTBenchmarkLimits` computes the limits of each benchmark datacard. - -
- ---8<-- "content/snippets/parameters.md@-2,21,61,48,34,17,18,47,60" - -
diff --git a/docs/content/snippets/fitdiagnostics_param_tab.md b/docs/content/snippets/fitdiagnostics_param_tab.md index cb856a445c94f112c54371fa04b52e6d0c10f036..1fe1d1e9102046c70dfc0d41b2cfcb013b679f7f 100644 --- a/docs/content/snippets/fitdiagnostics_param_tab.md +++ b/docs/content/snippets/fitdiagnostics_param_tab.md @@ -2,6 +2,6 @@ The `PostFitShapes` task takes the workspace created by `CreateWorkspace` and ru
---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,17,18,47,34,57,58" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,17,18,74,47,34,57,58"
diff --git a/docs/content/snippets/goodnessoffit_param_tab.md b/docs/content/snippets/goodnessoffit_param_tab.md index 38b5c3a201374ab3790a0f2276dab3fa8bd029ea..c6a5ef16ab7a84971a66edcaad209b6547debe88 100644 --- a/docs/content/snippets/goodnessoffit_param_tab.md +++ b/docs/content/snippets/goodnessoffit_param_tab.md @@ -2,6 +2,6 @@ The `GoodnessOfFit` task performs the tests for either data (`--branch 0`) and m
---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,51,49,50,53,34,13,36" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,74,51,49,50,53,34,13,36"
diff --git a/docs/content/snippets/likelihoodscan_param_tab.md b/docs/content/snippets/likelihoodscan_param_tab.md index eeecedac78425e640c99c33588b6ee1d01126f0e..2ce7ee59f5d336fc0efdd363a74482bb96a3cc47 100644 --- a/docs/content/snippets/likelihoodscan_param_tab.md +++ b/docs/content/snippets/likelihoodscan_param_tab.md @@ -3,6 +3,6 @@ It provides some handy cli parameters to manipulate POIs, ranges and other optio
---8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18,47,34,35,13,36" +--8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18,74,47,34,35,13,36"
diff --git a/docs/content/snippets/mergeeftbenchmarklimits_param_tab.md b/docs/content/snippets/mergeeftbenchmarklimits_param_tab.md index 66d438d32d93de4a7af7209d03e6dcb57278523c..17db11a837de75af67194dc7aec1bfb3e4094c5f 100644 --- a/docs/content/snippets/mergeeftbenchmarklimits_param_tab.md +++ b/docs/content/snippets/mergeeftbenchmarklimits_param_tab.md @@ -1,4 +1,4 @@ -The `MergeEFTUpperLimits` task collects the limits from each of the `EFTUpperLimits` tasks and merges them into a single file. +The `MergeEFTBenchmarkLimits` task collects the limits from each of the `EFTBenchmarkLimits` tasks and merges them into a single file.
diff --git a/docs/content/snippets/mergeeftupperlimits_param_tab.md b/docs/content/snippets/mergeeftupperlimits_param_tab.md deleted file mode 100644 index 2c3761ffb6658b6a4a7ae8cb8ac3d9ea08890172..0000000000000000000000000000000000000000 --- a/docs/content/snippets/mergeeftupperlimits_param_tab.md +++ /dev/null @@ -1,7 +0,0 @@ -The `MergeEFTUpperLimits` task collects the limits from each of the `EFTUpperLimits` tasks and merges them into a single file. - -
- ---8<-- "content/snippets/parameters.md@-2,21,61,48,34,17,18,47,60" - -
diff --git a/docs/content/snippets/mergegoodnessoffit_param_tab.md b/docs/content/snippets/mergegoodnessoffit_param_tab.md index 545796ceaba39764577967f7c26d6d1d2f0368c7..c2b7c6e6113d90c3376c22a43ca0da28dde99cd5 100644 --- a/docs/content/snippets/mergegoodnessoffit_param_tab.md +++ b/docs/content/snippets/mergegoodnessoffit_param_tab.md @@ -2,6 +2,6 @@ The `MergeGoodnessOfFit` task collects the test statistic results from each of t
---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,51,49,50,53" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,74,51,49,50,53"
diff --git a/docs/content/snippets/mergelikelihoodscan_param_tab.md b/docs/content/snippets/mergelikelihoodscan_param_tab.md index 502a425f0764c4d6fa1fb12757e0751475b8cd4d..59c25ec63af019608e8af0383f0bd9f3fd5211e3 100644 --- a/docs/content/snippets/mergelikelihoodscan_param_tab.md +++ b/docs/content/snippets/mergelikelihoodscan_param_tab.md @@ -2,6 +2,6 @@ The `MergeLikelihoodScan` task collects the fit results from each of the `Likeli
---8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18" +--8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18,74"
diff --git a/docs/content/snippets/mergepullsandimpacts_param_tab.md b/docs/content/snippets/mergepullsandimpacts_param_tab.md index 669ab647bcf2a542bcef5374a0186ea37377c0c2..f28c45c5d874c743c5c0d1abdb93c6bbc7e6ffd9 100644 --- a/docs/content/snippets/mergepullsandimpacts_param_tab.md +++ b/docs/content/snippets/mergepullsandimpacts_param_tab.md @@ -2,6 +2,6 @@ The `MergePullsAndImpacts` task collects the fit results from the `PullsAndImpac
---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,74,17,18,56,28,33,67" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,74,17,18,74,56,28,33,67"
diff --git a/docs/content/snippets/mergesignificancescan_param_tab.md b/docs/content/snippets/mergesignificancescan_param_tab.md index 53e00a3fc87c9045bbfe4d4f2ba56d224e4b0ca5..e25d34d75c48c61ac84ce7c3294b2c9f5c6e4adc 100644 --- a/docs/content/snippets/mergesignificancescan_param_tab.md +++ b/docs/content/snippets/mergesignificancescan_param_tab.md @@ -2,6 +2,6 @@ The `MergeSignificanceScan` task collects the fit results from each of the `Sign
---8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,53,48,17,18" +--8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,53,48,17,18,74"
diff --git a/docs/content/snippets/mergeupperlimits_param_tab.md b/docs/content/snippets/mergeupperlimits_param_tab.md index 161709d2bbbb9e6c55229b59eb40cef984e9c037..75b118055eeacd99efffba614078a0159ec8f8d5 100644 --- a/docs/content/snippets/mergeupperlimits_param_tab.md +++ b/docs/content/snippets/mergeupperlimits_param_tab.md @@ -2,6 +2,6 @@ The `MergeUpperLimits` task collects the fit results from each of the `UpperLimi
---8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18" +--8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18,74"
diff --git a/docs/content/snippets/parameters.md b/docs/content/snippets/parameters.md index 91a75567e9ab113856fa3480f36ad42a9d81eb45..18cb41b630595c907f01dfeea51877a349cb60b0 100644 --- a/docs/content/snippets/parameters.md +++ b/docs/content/snippets/parameters.md @@ -71,4 +71,4 @@ | `--prefit` | Draw prefit histograms and uncertainties instead of postfit ones. Defaults to `False`. | | `--show-best-fit-error` | When `True`, the uncertainty on the POIs best fit value is shown as well. Defaults to `True`. | | `--recompute-best-fit` | When `True`, the best fit value is recomputed from `scipy.interpolate` and `scipy.minimize`. Defaults to `False`. | -| `--use-snapshot` | When `True`, uses the initial fit as snapshot for nuisance fits. Recommended when running unblinded. Defaults to `False`. | +| `--use-snapshot` | When `True`, performs a [MultiDimFit snapshot](snapshot.md) first and uses it instead of the initial workspace. When running unblinded, consider using this option. Defaults to `False`. | diff --git a/docs/content/snippets/ploteftupperlimits_param_tab.md b/docs/content/snippets/ploteftupperlimits_param_tab.md deleted file mode 100644 index bb32a49f38c93092d66ba10daeea51bafb33cfa2..0000000000000000000000000000000000000000 --- a/docs/content/snippets/ploteftupperlimits_param_tab.md +++ /dev/null @@ -1,7 +0,0 @@ -The `PlotEFTUpperLimits` task collects the results from the `MergeEFTUpperLimits` task and visualizes them in a plot. - -
- ---8<-- "content/snippets/parameters.md@-2,21,61,48,17,18,10,11,3,4,7,8,9,60" - -
diff --git a/docs/content/snippets/plotexclusionandbestfit2d_param_tab.md b/docs/content/snippets/plotexclusionandbestfit2d_param_tab.md index cdf6e57bed71a24536e06dd0d729a9dfd082f733..ed848faebbf977d61857b0634c957af94f543773 100644 --- a/docs/content/snippets/plotexclusionandbestfit2d_param_tab.md +++ b/docs/content/snippets/plotexclusionandbestfit2d_param_tab.md @@ -2,6 +2,6 @@ The `PlotExclusionAndBestFit2D` task collects the fit results from the `MergeUpp
---8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,55,72,73,48,17,18,12,39,40,11,3,4,5,6,7,8" +--8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,55,72,73,48,17,18,74,12,39,40,11,3,4,5,6,7,8"
diff --git a/docs/content/snippets/plotexclusionandbestfit_param_tab.md b/docs/content/snippets/plotexclusionandbestfit_param_tab.md index 556277f83bf2ce2d9a28850b06da85145b61e34a..d666df3ae527685b198021a4927e281e99ff3b8f 100644 --- a/docs/content/snippets/plotexclusionandbestfit_param_tab.md +++ b/docs/content/snippets/plotexclusionandbestfit_param_tab.md @@ -2,6 +2,6 @@ The `PlotExclusionAndBestFit` task collects the fit results from the `MergeUpper
---8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,55,72,73,48,17,18,3,4,22,23,12,5,6" +--8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,55,72,73,48,17,18,74,3,4,22,23,12,5,6"
diff --git a/docs/content/snippets/plotgoodnessoffit_param_tab.md b/docs/content/snippets/plotgoodnessoffit_param_tab.md index 64fbb27b8754496a100b7cbe8721723fe960dc0a..9cc94e54063f4be0f89f6878d42c0f8aaa26be5b 100644 --- a/docs/content/snippets/plotgoodnessoffit_param_tab.md +++ b/docs/content/snippets/plotgoodnessoffit_param_tab.md @@ -2,6 +2,6 @@ The `PlotGoodnessOfFit` task collects the results from `MergeGoodnessOfFit` and
---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,51,49,50,53,3,4,5,6,7,8" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,74,51,49,50,53,3,4,5,6,7,8"
diff --git a/docs/content/snippets/plotlikelihoodscan_param_tab.md b/docs/content/snippets/plotlikelihoodscan_param_tab.md index e2d4f51b7040b587de865a519da43db7f8ba9a3a..afaec455faca5970d3dfce058313dd649bfc91af 100644 --- a/docs/content/snippets/plotlikelihoodscan_param_tab.md +++ b/docs/content/snippets/plotlikelihoodscan_param_tab.md @@ -3,6 +3,6 @@ It provides some handy cli parameters to manipulate the visualisation.
---8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,48,17,18,3,4,63,55,72,73,62,5,6,7,8,9,24,25" +--8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,48,17,18,74,3,4,63,55,72,73,62,5,6,7,8,9,24,25"
diff --git a/docs/content/snippets/plotmultiplegoodnessoffits_param_tab.md b/docs/content/snippets/plotmultiplegoodnessoffits_param_tab.md index 95ede59d3e985cb478e79fba8bada1344a5ee709..1d5c4b2594c8779a4dc53ffc9b14ef37f5336364 100644 --- a/docs/content/snippets/plotmultiplegoodnessoffits_param_tab.md +++ b/docs/content/snippets/plotmultiplegoodnessoffits_param_tab.md @@ -2,6 +2,6 @@ The `PlotMultipleGoodnessOfFits` task collects results from multiple `MergeGoodn
---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,51,49,50,53,3,4,22,23,5,6,7,8" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,74,51,49,50,53,3,4,22,23,5,6,7,8"
diff --git a/docs/content/snippets/plotmultiplelikelihoodscans_param_tab.md b/docs/content/snippets/plotmultiplelikelihoodscans_param_tab.md index 3a1270483f963c19fa2972abfc8f8168e9b82af2..f8c9e623a74e6886a098fca8d017bccc06344740 100644 --- a/docs/content/snippets/plotmultiplelikelihoodscans_param_tab.md +++ b/docs/content/snippets/plotmultiplelikelihoodscans_param_tab.md @@ -2,6 +2,6 @@ The `PlotMultipleLikelihoodScans` task collects the fit results from multiple `M
---8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,48,17,18,3,4,22,23,55,73,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,48,17,18,74,3,4,22,23,55,73,5,6,7,8,9"
diff --git a/docs/content/snippets/plotmultiplelikelihoodscansbymodel_param_tab.md b/docs/content/snippets/plotmultiplelikelihoodscansbymodel_param_tab.md index 05dba9c7dbdc492a3a519e7f15810b8d4e22cf93..40d53fff967b2774db74eba4b2ddc01a1132fc0d 100644 --- a/docs/content/snippets/plotmultiplelikelihoodscansbymodel_param_tab.md +++ b/docs/content/snippets/plotmultiplelikelihoodscansbymodel_param_tab.md @@ -2,6 +2,6 @@ The `PlotMultipleLikelihoodScansByModel` task collects the fit results from mult
---8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,17,18,3,4,45,46,55,73,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,17,18,74,3,4,45,46,55,73,5,6,7,8,9"
diff --git a/docs/content/snippets/plotmultiplesignificancescans_param_tab.md b/docs/content/snippets/plotmultiplesignificancescans_param_tab.md index be53d3f431c34c4129e9e02c48d835d27f3f5f1c..fa06cde0718ce53278fa888983e9e6dc8fb5cca4 100644 --- a/docs/content/snippets/plotmultiplesignificancescans_param_tab.md +++ b/docs/content/snippets/plotmultiplesignificancescans_param_tab.md @@ -2,6 +2,6 @@ The `PlotMultipleSignificanceScans` task collects the fit results from multiple
---8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,53,48,17,18,3,4,64,22,23,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,53,48,17,18,74,3,4,64,22,23,5,6,7,8,9"
diff --git a/docs/content/snippets/plotmultipleupperlimits_param_tab.md b/docs/content/snippets/plotmultipleupperlimits_param_tab.md index 9f98ccd85db224c1fd21582b68e86fe093cde784..26c2622165c46edb50bdf98f861174671b2473f6 100644 --- a/docs/content/snippets/plotmultipleupperlimits_param_tab.md +++ b/docs/content/snippets/plotmultipleupperlimits_param_tab.md @@ -2,6 +2,6 @@ The `PlotMultipleUpperLimits` task collects the fit results from multiple `Merge
---8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,17,18,10,11,3,4,22,23,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,17,18,74,10,11,3,4,22,23,5,6,7,8,9"
diff --git a/docs/content/snippets/plotnuisancelikelihoodscans_param_tab.md b/docs/content/snippets/plotnuisancelikelihoodscans_param_tab.md index 79e6e0ec1c99e0b2a97fb514f609b2a43925e2e6..ac1132981a336eb5070bad5bb88a392c3687c058 100644 --- a/docs/content/snippets/plotnuisancelikelihoodscans_param_tab.md +++ b/docs/content/snippets/plotnuisancelikelihoodscans_param_tab.md @@ -2,6 +2,6 @@ The `PlotNuisanceLikelihoodScans` task collects results from the `FitDiagnostics
---8<-- "content/snippets/parameters.md@-2,20,19,16,48,14,66,12,17,18,56,28,27,59,3,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,20,19,16,48,14,66,12,17,18,74,56,28,27,59,3,5,6,7,8,9"
diff --git a/docs/content/snippets/plotpostfitsoverb_param_tab.md b/docs/content/snippets/plotpostfitsoverb_param_tab.md index 13b5ea57fca0e26d522c8d20bc911e881b01763d..431abf3e3aa1475da2077fddefe8726ed2c65417 100644 --- a/docs/content/snippets/plotpostfitsoverb_param_tab.md +++ b/docs/content/snippets/plotpostfitsoverb_param_tab.md @@ -2,6 +2,6 @@ The `PlotPostfitSOverB` task reads the fit diagnostics result from the `PostFitS
---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,12,48,17,18,43,65,3,4,71,69,44,70,6,7,41,42" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,12,48,17,18,74,43,65,3,4,71,69,44,70,6,7,41,42"
diff --git a/docs/content/snippets/plotpullsandimpacts_param_tab.md b/docs/content/snippets/plotpullsandimpacts_param_tab.md index 2ef2e2bbc90f0a2fd5c4190de94969ccd6168488..49a81eeb0d03ed1b538a46ef5a9b2f4441222f62 100644 --- a/docs/content/snippets/plotpullsandimpacts_param_tab.md +++ b/docs/content/snippets/plotpullsandimpacts_param_tab.md @@ -2,6 +2,6 @@ The `PlotPullsAndImpacts` task collects fit results from `MergePullsAndImpacts`
---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,74,17,18,65,68,56,28,29,33,27,52,30,31,32,3,4,67" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,17,18,74,65,68,56,28,29,33,27,52,30,31,32,3,4,67"
diff --git a/docs/content/snippets/plotsignificancescan_param_tab.md b/docs/content/snippets/plotsignificancescan_param_tab.md index 79f8dbae51749afd4ccddf6cd0c71cdffdc324ad..aafb94d5ee6d4d7dcd1c23dd566d15b1d15eea64 100644 --- a/docs/content/snippets/plotsignificancescan_param_tab.md +++ b/docs/content/snippets/plotsignificancescan_param_tab.md @@ -2,6 +2,6 @@ The `PlotSignificanceScan` task collects the fit results from `MergeSignificance
---8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,53,48,17,18,3,4,64,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,53,48,17,18,74,3,4,64,5,6,7,8,9"
diff --git a/docs/content/snippets/plotupperlimits_param_tab.md b/docs/content/snippets/plotupperlimits_param_tab.md index c00580f2b5222532dfa7c02866dc745f8743d386..88e7fda117cda2337aea1ff6141fb11db4dc321e 100644 --- a/docs/content/snippets/plotupperlimits_param_tab.md +++ b/docs/content/snippets/plotupperlimits_param_tab.md @@ -3,6 +3,6 @@ It provides some handy CLI parameters to manipulate the visualisation.
---8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,48,17,18,10,11,3,4,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,48,17,18,74,10,11,3,4,5,6,7,8,9"
diff --git a/docs/content/snippets/plotupperlimitsatpoint_param_tab.md b/docs/content/snippets/plotupperlimitsatpoint_param_tab.md index 367bab9b016577575c9d5d705715344b40eeea0e..5d04148e36798383815ad16e07339d38973dcd3e 100644 --- a/docs/content/snippets/plotupperlimitsatpoint_param_tab.md +++ b/docs/content/snippets/plotupperlimitsatpoint_param_tab.md @@ -2,6 +2,6 @@ The `PlotUpperLimitsAtPoint` task collects a single fit result from each `UpperL
---8<-- "content/snippets/parameters.md@-2,21,19,16,14,66,12,48,17,18,10,11,3,4,22,23,37,5,6,38" +--8<-- "content/snippets/parameters.md@-2,21,19,16,14,66,12,48,17,18,74,10,11,3,4,22,23,37,5,6,38"
diff --git a/docs/content/snippets/pullsandimpacts_param_tab.md b/docs/content/snippets/pullsandimpacts_param_tab.md index aed97a9de91ce90c06a37dad961d6c539d50ff53..ef203b0883b1030fcb9a093bacd1e6e42175d1c2 100644 --- a/docs/content/snippets/pullsandimpacts_param_tab.md +++ b/docs/content/snippets/pullsandimpacts_param_tab.md @@ -3,6 +3,6 @@ It provides some handy cli parameters to manipulate POIs, ranges and other optio
---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,74,17,18,56,28,33,13,34,36" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,17,18,74,56,28,33,13,34,36"
diff --git a/docs/content/snippets/significancescan_param_tab.md b/docs/content/snippets/significancescan_param_tab.md index 8ae51a5699cd6e55238ba1cf90e865f04c5fe340..8dd4c50797152d3ac5bb50fa1d53387815fa60a9 100644 --- a/docs/content/snippets/significancescan_param_tab.md +++ b/docs/content/snippets/significancescan_param_tab.md @@ -2,6 +2,6 @@ The `SignificanceScan` runs the fits for each point in the defined scan paramete
---8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,48,17,18,47,53,34,35,13,36" +--8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,48,17,18,74,47,53,34,35,13,36"
diff --git a/docs/content/snippets/snapshot_param_tab.md b/docs/content/snippets/snapshot_param_tab.md new file mode 100644 index 0000000000000000000000000000000000000000..9cb13a61556956c4ae5f5371453473bcf8543d31 --- /dev/null +++ b/docs/content/snippets/snapshot_param_tab.md @@ -0,0 +1,8 @@ +The `Snapshot` task runs a single fit for the given POI and parameter values, and saves the workspace with all parameters set to their best fit values following a standard `MultiDimFit`. +It provides some handy cli parameters to manipulate POIs, ranges and other options. + +
+ +--8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18,47,34,35,13,36" + +
diff --git a/docs/content/snippets/upperlimits_param_tab.md b/docs/content/snippets/upperlimits_param_tab.md index d6c78872a6deed87c7073863d81551a9609a74f7..8d6eedfc00fea522439c047473f0b1c9af4e0407 100644 --- a/docs/content/snippets/upperlimits_param_tab.md +++ b/docs/content/snippets/upperlimits_param_tab.md @@ -3,6 +3,6 @@ It provides some handy cli parameters to manipulate POIs, ranges and other optio
---8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18,47,34,35,13,36" +--8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18,74,47,34,35,13,36"
diff --git a/docs/content/tasks/snapshot.md b/docs/content/tasks/snapshot.md new file mode 100644 index 0000000000000000000000000000000000000000..5505a0d6c7d6104007ac7d48659000124c5e7b1a --- /dev/null +++ b/docs/content/tasks/snapshot.md @@ -0,0 +1,86 @@ +The `Snapshot` task is essentially performing a `MultiDimFit` and saves a copy of the initial workspace with all parameters set to their best fit values. +This updated workspace can be used in all subsequent `combine` commands just like the normal workspace, with the benefit that parameters are already at sensible values before the fit which can lead to faster and potentially more stable convergence. +See the [combine documentation](http://cms-analysis.github.io/HiggsAnalysis-CombinedLimit/part3/runningthetool/#loading-snapshots) for more info on snapshots. + +Almost all tasks within the inference tools are able to use a workspace snapshot instead of the normal workspace by adding ==`--use-snapshot`== to the `law run ...` command to run. +By doing so, the requirement structure is slightly changed in order to first run the snapshot for the selected datacards, HH physics model, parameter options, blinded / unblinded flags, etc. + +As an example, the workflow triggered by `PlotLikelihoodScan` with the additional `--use-snapshot` option will look like this: + +```mermaid + graph LR; + A(PlotLikelihoodScan) --> B(MergeLikelihoodScan); + B --> C([LikelihoodScan]); + C --> D([Snapshot]); + D --> E(CreateWorkspace); + E --> F(CombineDatacards); +``` + +Note the additional `Snapshot` task between `Likelihoodscan` and `CreateWorkspace`. +In any other regard, the `Snapshot` behaves like the `LikelihoodScan` task, i.e., it supports the same options and it is able to submit jobs to HTCondor (as indicated by the round boxes). + +- [Quick example](#quick-example) +- [Dependencies](#dependencies) +- [Parameters](#parameters) +- [Example commands](#example-commands) + + +#### Quick example + +```shell +law run Snapshot \ + --version dev \ + --datacards $DHI_EXAMPLE_CARDS \ + --pois r +``` + +#### Dependencies + +```mermaid + graph LR; + A([Snapshot]) --> B(CreateWorkspace); + B --> C(CombineDatacards); +``` + +Rounded boxes mark [workflows](practices.md#workflows) with the option to run tasks as HTCondor jobs. + + +#### Parameters + +=== "LikelihoodScan" + + --8<-- "content/snippets/snapshot_param_tab.md" + +=== "CreateWorkspace" + + --8<-- "content/snippets/createworkspace_param_tab.md" + +=== "CombineDatacards" + + --8<-- "content/snippets/combinedatacards_param_tab.md" + + +#### Example commands + +**1.** Blinded snapshot on POI `kl` with `kt` set to 1.5. + +```shell hl_lines="4-5" +law run Snapshot \ + --version dev \ + --datacards $DHI_EXAMPLE_CARDS \ + --pois kl \ + --parameter-values kt=1.5 +``` + + +**2.** Same as example command 1, but execute the snapshot task on HTCondor and run unblinded. + +```shell hl_lines="6-7" +law run Snapshot \ + --version dev \ + --datacards $DHI_EXAMPLE_CARDS \ + --pois kl \ + --parameter-values kt=1.5 \ + --workflow htcondor \ + --unblinded +``` diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index c264d2f19425c9b66a74757090eefced56fb6a5c..df96bf7799f22e08c2c838c70ac1d093c8bc2eca 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -71,7 +71,7 @@ markdown_extensions: check_paths: true extra_javascript: - - https://unpkg.com/mermaid@8.6/dist/mermaid.min.js + - https://unpkg.com/mermaid@8.12/dist/mermaid.min.js extra_css: - extra/styles.css @@ -94,6 +94,7 @@ nav: - Postfit plots: tasks/postfit.md - Goodness-of-fit tests: tasks/gof.md - EFT limits: tasks/eft.md + - Snapshots: tasks/snapshot.md - Best practices: tasks/practices.md - Datacard manipulation: datacard_manipulation.md - Useful scripts: scripts.md diff --git a/modules/law b/modules/law index 8298b62f07a3ab177e0f95a018d33b32f68453ee..250aab4d27217fa713e899cfcd80256fafcf91ec 160000 --- a/modules/law +++ b/modules/law @@ -1 +1 @@ -Subproject commit 8298b62f07a3ab177e0f95a018d33b32f68453ee +Subproject commit 250aab4d27217fa713e899cfcd80256fafcf91ec diff --git a/setup.sh b/setup.sh index 1c55277a17a87585fd1f05b9c9aed67ba5b29f87..b052de18b6421876e9e0829c0ad5f15496b46576 100644 --- a/setup.sh +++ b/setup.sh @@ -62,7 +62,7 @@ setup() { # CMSSW & combine setup # - local combine_version="3" + local combine_version="4" local flag_file_combine="$DHI_SOFTWARE/.combine_good" if [ "$DHI_COMBINE_STANDALONE" != "True" ]; then @@ -92,7 +92,7 @@ setup() { git fetch --tags && \ git checkout tags/v8.2.0 && \ chmod ug+x test/diffNuisances.py && \ - scram b -j + scram b -j ${DHI_INSTALL_CORES} ) || return "$?" else # standalone @@ -364,7 +364,7 @@ interactive_setup() { # prepare the tmp env file if ! $setup_is_default && ! $env_file_exists; then - rm -rf "$env_file_tmp" + rm -f "$env_file_tmp" mkdir -p "$( dirname "$env_file_tmp" )" echo -e "\nStart querying variables for setup '\x1b[0;49;35m$setup_name\x1b[0m'"