From 92448dfd02783d812ee6b177c0da880c54b2496a Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Thu, 26 Aug 2021 19:34:35 +0200 Subject: [PATCH 01/21] Enable snapshots in likelihood scans. --- dhi/tasks/likelihoods.py | 57 +++++++++++++++++++++++++++++++------- dhi/tasks/pulls_impacts.py | 2 +- 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/dhi/tasks/likelihoods.py b/dhi/tasks/likelihoods.py index d04564b7..2b185204 100644 --- a/dhi/tasks/likelihoods.py +++ b/dhi/tasks/likelihoods.py @@ -31,6 +31,11 @@ class LikelihoodBase(POIScanTask): class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow): + snapshot_branch = luigi.IntParameter( + default=law.NO_INT, + description="when set, use the fit from this branch task as a snapshot; default: empty", + ) + run_command_in_tmp = True def create_branch_map(self): @@ -50,31 +55,60 @@ class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCo return linspace + @property + def use_snapshot(self): + return self.snapshot_branch not in (None, law.NO_INT) + def workflow_requires(self): reqs = super(LikelihoodScan, self).workflow_requires() - reqs["workspace"] = self.requires_from_branch() + reqs["workspace"] = CreateWorkspace.req(self) + if self.use_snapshot and set(self.branch_map) != {self.snapshot_branch}: + reqs["snapshot"] = self.req(self, branches=(self.snapshot_branch,)) return reqs def requires(self): - return CreateWorkspace.req(self) + reqs = {"workspace": CreateWorkspace.req(self)} + if self.use_snapshot and self.branch != self.snapshot_branch: + reqs["snapshot"] = self.req(self, branch=self.snapshot_branch) + return reqs def output(self): - return self.local_target("likelihood__" + self.get_output_postfix() + ".root") + parts = [] + if self.use_snapshot: + if self.branch == self.snapshot_branch: + parts.append("withsnapshot") + else: + parts.extend(["fromsnapshot"] + [ + "{}{}".format(*tpl) for tpl in + zip(self.scan_parameter_names, self.branch_map[self.snapshot_branch]) + ]) + + name = self.join_postfix(["likelihood", self.get_output_postfix(), parts]) + ".root" + return self.local_target(name) - @property - def blinded_args(self): + def build_command(self): + # get the workspace to use and define snapshot args + use_snapshot = self.use_snapshot and self.branch != self.snapshot_branch + if use_snapshot: + workspace = self.input()["snapshot"].path + snapshot_args = "--snapshotName MultiDimFit" + else: + workspace = self.input()["workspace"].path + snapshot_args = "--saveWorkspace" if self.use_snapshot else "" + + # args for blinding / unblinding if self.unblinded: - return "--seed {self.branch}".format(self=self) + blinded_args = "--seed {self.branch}".format(self=self) else: - return "--seed {self.branch} --toys {self.toys}".format(self=self) + blinded_args = "--seed {self.branch} --toys {self.toys}".format(self=self) - def build_command(self): + # build the command return ( "combine -M MultiDimFit {workspace}" " {self.custom_args}" " --verbose 1" " --mass {self.mass}" - " {self.blinded_args}" + " {blinded_args}" " --algo grid" " --redefineSignalPOIs {self.joined_pois}" " --gridPoints {self.joined_scan_points}" @@ -85,6 +119,7 @@ class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCo " --setParameters {self.joined_parameter_values}" " --freezeParameters {self.joined_frozen_parameters}" " --freezeNuisanceGroups {self.joined_frozen_groups}" + " {snapshot_args}" " --saveNLL" " --X-rtd REMOVE_CONSTANT_ZERO_POINT=1" " {self.combine_optimization_args}" @@ -92,8 +127,10 @@ class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCo "mv higgsCombineTest.MultiDimFit.mH{self.mass_int}.{self.branch}.root {output}" ).format( self=self, - workspace=self.input().path, + workspace=workspace, output=self.output().path, + blinded_args=blinded_args, + snapshot_args=snapshot_args, ) diff --git a/dhi/tasks/pulls_impacts.py b/dhi/tasks/pulls_impacts.py index e61555ff..587de2d6 100644 --- a/dhi/tasks/pulls_impacts.py +++ b/dhi/tasks/pulls_impacts.py @@ -101,7 +101,7 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow reqs = super(PullsAndImpacts, self).workflow_requires() reqs["workspace"] = CreateWorkspace.req(self) if self.use_snapshot and set(self.branch_map) != {0}: - reqs["snapshot"] = self.req(self, branches=[0]) + reqs["snapshot"] = self.req(self, branches=(0,)) return reqs def requires(self): -- GitLab From 815fd877fedfa4881bca2088c1508c4f66f37f0e Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Fri, 27 Aug 2021 09:45:50 +0200 Subject: [PATCH 02/21] Fix snapshot usage in likelihood scan. --- dhi/plots/likelihoods.py | 4 +- dhi/tasks/combine.py | 8 --- dhi/tasks/likelihoods.py | 117 ++++++++++++++++++++++++--------------- 3 files changed, 75 insertions(+), 54 deletions(-) diff --git a/dhi/plots/likelihoods.py b/dhi/plots/likelihoods.py index 0952705e..dea39234 100644 --- a/dhi/plots/likelihoods.py +++ b/dhi/plots/likelihoods.py @@ -1141,8 +1141,8 @@ def evaluate_likelihood_scan_1d(poi_values, dnll2_values, poi_min=None): # compare and optionally issue a warning (threshold to be optimized) if abs(poi_min - poi_min_new) >= 0.03: warn( - "WARNING: external POI minimum (from combine) {:.4f} differs from the " - "recomputed value (from scipy.interpolate and scipy.minimize) {:.4f}".format( + "WARNING: external POI minimum {:.4f} (from combine) differs from the " + "recomputed value {:.4f} (from scipy.interpolate and scipy.minimize)".format( poi_min, poi_min_new) ) else: diff --git a/dhi/tasks/combine.py b/dhi/tasks/combine.py index 2a6306a6..338ad86b 100644 --- a/dhi/tasks/combine.py +++ b/dhi/tasks/combine.py @@ -1265,14 +1265,6 @@ class POIScanTask(POITask, ParameterScanTask): return self.join_postfix(parts) if join else parts - def _joined_parameter_values_pois(self): - pois = super(POIScanTask, self)._joined_parameter_values_pois() - - # skip scan parameters - pois = [p for p in pois if p not in self.scan_parameter_names] - - return pois - class POIPlotTask(PlotTask, POITask): diff --git a/dhi/tasks/likelihoods.py b/dhi/tasks/likelihoods.py index 2b185204..887f5e0d 100644 --- a/dhi/tasks/likelihoods.py +++ b/dhi/tasks/likelihoods.py @@ -5,9 +5,11 @@ Tasks related to likelihood scans. """ import copy +from operator import mul import law import luigi +import six from dhi.tasks.base import HTCondorWorkflow, view_output_plots from dhi.tasks.combine import ( @@ -27,6 +29,7 @@ class LikelihoodBase(POIScanTask): pois._default = ("kl",) force_scan_parameters_equal_pois = True + allow_parameter_values_in_pois = True class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow): @@ -35,6 +38,11 @@ class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCo default=law.NO_INT, description="when set, use the fit from this branch task as a snapshot; default: empty", ) + save_snapshot = luigi.BoolParameter( + default=False, + description="when set, saves the best fit snapshot workspace in the output file; " + "default: False", + ) run_command_in_tmp = True @@ -62,39 +70,39 @@ class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCo def workflow_requires(self): reqs = super(LikelihoodScan, self).workflow_requires() reqs["workspace"] = CreateWorkspace.req(self) - if self.use_snapshot and set(self.branch_map) != {self.snapshot_branch}: - reqs["snapshot"] = self.req(self, branches=(self.snapshot_branch,)) + if self.use_snapshot: + reqs["snapshot"] = self.req(self, branches=(self.snapshot_branch,), save_snapshot=True, + snapshot_branch=law.NO_INT) return reqs def requires(self): reqs = {"workspace": CreateWorkspace.req(self)} - if self.use_snapshot and self.branch != self.snapshot_branch: - reqs["snapshot"] = self.req(self, branch=self.snapshot_branch) + if self.use_snapshot: + reqs["snapshot"] = self.req(self, branch=self.snapshot_branch, save_snapshot=True, + snapshot_branch=law.NO_INT) return reqs def output(self): parts = [] + if self.save_snapshot: + parts.append("withsnapshot") if self.use_snapshot: - if self.branch == self.snapshot_branch: - parts.append("withsnapshot") - else: - parts.extend(["fromsnapshot"] + [ - "{}{}".format(*tpl) for tpl in - zip(self.scan_parameter_names, self.branch_map[self.snapshot_branch]) - ]) + parts.extend(["fromsnapshot"] + [ + "{}{}".format(*tpl) for tpl in + zip(self.scan_parameter_names, self.branch_map[self.snapshot_branch]) + ]) name = self.join_postfix(["likelihood", self.get_output_postfix(), parts]) + ".root" return self.local_target(name) def build_command(self): # get the workspace to use and define snapshot args - use_snapshot = self.use_snapshot and self.branch != self.snapshot_branch - if use_snapshot: + snapshot_args = "--saveWorkspace" if self.save_snapshot else "" + if self.use_snapshot: workspace = self.input()["snapshot"].path - snapshot_args = "--snapshotName MultiDimFit" + snapshot_args += " --snapshotName MultiDimFit" else: workspace = self.input()["workspace"].path - snapshot_args = "--saveWorkspace" if self.use_snapshot else "" # args for blinding / unblinding if self.unblinded: @@ -103,7 +111,7 @@ class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCo blinded_args = "--seed {self.branch} --toys {self.toys}".format(self=self) # build the command - return ( + cmd = ( "combine -M MultiDimFit {workspace}" " {self.custom_args}" " --verbose 1" @@ -133,6 +141,8 @@ class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCo snapshot_args=snapshot_args, ) + return cmd + class MergeLikelihoodScan(LikelihoodBase): @@ -392,14 +402,14 @@ class PlotLikelihoodScan(LikelihoodBase, POIPlotTask): paper=self.paper, ) - def load_scan_data(self, inputs, merge_scans=True, recompute_dnll2=True): + def load_scan_data(self, inputs, recompute_dnll2=True, merge_scans=True): return self._load_scan_data(inputs, self.scan_parameter_names, - self.get_scan_parameter_combinations(), merge_scans=merge_scans, - recompute_dnll2=recompute_dnll2) + self.get_scan_parameter_combinations(), recompute_dnll2=recompute_dnll2, + merge_scans=merge_scans) @classmethod def _load_scan_data(cls, inputs, scan_parameter_names, scan_parameter_combinations, - merge_scans=True, recompute_dnll2=True): + recompute_dnll2=True, merge_scans=True): import numpy as np # load values of each input @@ -413,18 +423,27 @@ class PlotLikelihoodScan(LikelihoodBase, POIPlotTask): for i in range(len(scan_parameter_names)) ]) + # nll0 values must be identical per input (otherwise there might be an issue with the model) + for v in values: + nll_unique = np.unique(v["nll0"]) + nll_unique = nll_unique[~np.isnan(nll_unique)] + if len(nll_unique) != 1: + raise Exception("found {} different nll0 values in scan data which indicates in " + "issue with the model: {}".format(len(nll_unique), nll_unique)) + + # recompute dnll2 from the minimum nll and fit_nll + if recompute_dnll2: + # use the overall minimal nll as a common reference value when merging + min_nll = min(np.nanmin(v["nll"]) for v in values) + for v in values: + _min_nll = min_nll if merge_scans else np.nanmin(v["nll"]) + v["dnll2"] = 2 * (v["fit_nll"] - _min_nll) + # concatenate values and safely remove duplicates when configured if merge_scans: test_fn = lambda kept, removed: kept < 1e-7 or abs((kept - removed) / kept) < 0.001 values = unique_recarray(values, cols=scan_parameter_names, - test_metric=("fit_nll", test_fn)) - - # recompute dnll2 from the minimum nll and fit_nll - if recompute_dnll2: - _values = [values] if merge_scans else values - min_nll = min(np.nanmin(v["nll"]) for v in _values) - for v in _values: - v["dnll2"] = 2 * (v["fit_nll"] - min_nll) + test_metric=("dnll2", test_fn)) # pick the most appropriate poi mins poi_mins = cls._select_poi_mins(all_poi_mins, scan_parameter_combinations) @@ -433,22 +452,32 @@ class PlotLikelihoodScan(LikelihoodBase, POIPlotTask): @classmethod def _select_poi_mins(cls, poi_mins, scan_parameter_combinations): - # pick the poi mins for the scan range that has the lowest step size around the mins - # the combined step size of multiple dims is simply defined by their sum - min_step_size = 1e5 - best_poi_mins = poi_mins[0] - for _poi_mins, scan_parameters in zip(poi_mins, scan_parameter_combinations): - if None in _poi_mins: - continue - # each min is required to be in the corresponding scan range - if not all((a <= v <= b) for v, (_, a, b, _) in zip(_poi_mins, scan_parameters)): - continue - # compute the merged step size - step_size = sum((b - a) / (n - 1.) for (_, a, b, n) in scan_parameters) - # store - if step_size < min_step_size: - min_step_size = step_size - best_poi_mins = _poi_mins + # pick the poi min corrsponding to the largest spanned region, i.e., range / area / volume + regions = [ + (i, six.moves.reduce(mul, [stop - start for _, start, stop, _ in scan_range])) + for i, scan_range in enumerate(scan_parameter_combinations) + ] + best_i = sorted(regions, key=lambda pair: -pair[1])[0][0] + best_poi_mins = poi_mins[best_i] + + # old algorithm: + # # pick the poi mins for the scan range that has the lowest step size around the mins + # # the combined step size of multiple dims is simply defined by their sum + # min_step_size = 1e5 + # best_poi_mins = poi_mins[0] + # for _poi_mins, scan_parameters in zip(poi_mins, scan_parameter_combinations): + # if None in _poi_mins: + # continue + # # each min is required to be in the corresponding scan range + # if not all((a <= v <= b) for v, (_, a, b, _) in zip(_poi_mins, scan_parameters)): + # continue + # # compute the merged step size + # step_size = sum((b - a) / (n - 1.) for (_, a, b, n) in scan_parameters) + # # store + # if step_size < min_step_size: + # min_step_size = step_size + # best_poi_mins = _poi_mins + return best_poi_mins -- GitLab From 88a1ddd9da66dec48d5a2312f5ae4f6afb1985ea Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Fri, 27 Aug 2021 12:57:46 +0200 Subject: [PATCH 03/21] Use new --setParameterRangesForGrid option in likelihood scans. --- dhi/plots/likelihoods.py | 3 ++- dhi/tasks/likelihoods.py | 19 +++---------------- setup.sh | 15 +++++++++------ 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/dhi/plots/likelihoods.py b/dhi/plots/likelihoods.py index dea39234..ac603714 100644 --- a/dhi/plots/likelihoods.py +++ b/dhi/plots/likelihoods.py @@ -1121,7 +1121,8 @@ def evaluate_likelihood_scan_1d(poi_values, dnll2_values, poi_min=None): # first, obtain an interpolation function # interp = scipy.interpolate.interp1d(poi_values, dnll2_values, kind="linear") try: - interp = scipy.interpolate.interp1d(poi_values, dnll2_values, kind="cubic") + interp = scipy.interpolate.interp1d(poi_values, dnll2_values, kind="cubic", + fill_value="extrapolate") except: return None diff --git a/dhi/tasks/likelihoods.py b/dhi/tasks/likelihoods.py index 887f5e0d..31fad193 100644 --- a/dhi/tasks/likelihoods.py +++ b/dhi/tasks/likelihoods.py @@ -47,21 +47,7 @@ class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCo run_command_in_tmp = True def create_branch_map(self): - linspace = self.get_scan_linspace() - - # when blinded and the expected best fit values of r and k POIs (== 1) are not contained - # in the grid points, log an error as this leads to a scenario where the global minimum nll - # is not computed and the deltas to all other nll values become arbitrary and incomparable - if self.blinded: - for i, poi in enumerate(self.pois): - values = [tpl[i] for tpl in linspace] - if poi in self.all_pois and 1 not in values: - scan = "start: {}, stop: {}, points: {}".format(*self.scan_parameters[i][1:]) - self.logger.error("the expected best fit value of 1 is not contained in the " - "values to scan for POI {} ({}), leading to dnll values being computed " - "relative to an arbitrary minimum".format(poi, scan)) - - return linspace + return self.get_scan_linspace() @property def use_snapshot(self): @@ -119,11 +105,12 @@ class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCo " {blinded_args}" " --algo grid" " --redefineSignalPOIs {self.joined_pois}" + " --setParameterRangesForGrid {self.joined_scan_ranges}" " --gridPoints {self.joined_scan_points}" " --firstPoint {self.branch}" " --lastPoint {self.branch}" " --alignEdges 1" - " --setParameterRanges {self.joined_scan_ranges}:{self.joined_parameter_ranges}" + " --setParameterRanges {self.joined_parameter_ranges}" " --setParameters {self.joined_parameter_values}" " --freezeParameters {self.joined_frozen_parameters}" " --freezeNuisanceGroups {self.joined_frozen_groups}" diff --git a/setup.sh b/setup.sh index 1c55277a..8dea7e01 100644 --- a/setup.sh +++ b/setup.sh @@ -62,7 +62,7 @@ setup() { # CMSSW & combine setup # - local combine_version="3" + local combine_version="4" local flag_file_combine="$DHI_SOFTWARE/.combine_good" if [ "$DHI_COMBINE_STANDALONE" != "True" ]; then @@ -77,6 +77,9 @@ setup() { [ "$DHI_REINSTALL_COMBINE" = "1" ] && rm -f "$flag_file_combine" if [ ! -f "$flag_file_combine" ]; then mkdir -p "$DHI_SOFTWARE" + # the combine setup below uses a custom branch which adds only a single commit on top of + # the recommended v8.2.0 tag; once https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit/pull/686 + # is merged, we can transition back to the central repo if [ "$DHI_COMBINE_STANDALONE" != "True" ]; then # CMSSW based ( @@ -87,12 +90,12 @@ setup() { cd "$CMSSW_VERSION/src" && \ eval "$( scramv1 runtime -sh )" && \ scram b && \ - git clone --depth 1 https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit.git HiggsAnalysis/CombinedLimit && \ + git clone --depth 1 --branch parameter_ranges_for_grid https://github.com/riga/HiggsAnalysis-CombinedLimit.git HiggsAnalysis/CombinedLimit && \ cd HiggsAnalysis/CombinedLimit && \ git fetch --tags && \ - git checkout tags/v8.2.0 && \ + git checkout parameter_ranges_for_grid && \ chmod ug+x test/diffNuisances.py && \ - scram b -j + scram b -j ${DHI_INSTALL_CORES} ) || return "$?" else # standalone @@ -100,10 +103,10 @@ setup() { echo "installing standalone combine at $DHI_SOFTWARE/HiggsAnalysis/CombinedLimit" cd "$DHI_SOFTWARE" rm -rf HiggsAnalysis/CombinedLimit - git clone --depth 1 https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit.git HiggsAnalysis/CombinedLimit && \ + git clone --depth 1 --branch parameter_ranges_for_grid https://github.com/riga/HiggsAnalysis-CombinedLimit.git HiggsAnalysis/CombinedLimit && \ cd HiggsAnalysis/CombinedLimit && \ git fetch --tags && \ - git checkout tags/v8.2.0 && \ + git checkout parameter_ranges_for_grid && \ chmod ug+x test/diffNuisances.py && \ source env_standalone.sh "" && \ make -j ${DHI_INSTALL_CORES} && \ -- GitLab From e41dde980e6e5df0fcdf31af5a1d5a4573a5f56b Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Fri, 27 Aug 2021 13:50:36 +0200 Subject: [PATCH 04/21] Improve plot_datacard_shapes.py script. --- dhi/scripts/plot_datacard_shapes.py | 40 +++++++++++++++------------ docs/content/datacard_manipulation.md | 7 +++-- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/dhi/scripts/plot_datacard_shapes.py b/dhi/scripts/plot_datacard_shapes.py index d1e26193..ef515833 100755 --- a/dhi/scripts/plot_datacard_shapes.py +++ b/dhi/scripts/plot_datacard_shapes.py @@ -43,9 +43,9 @@ from dhi.config import colors logger = create_console_logger(os.path.splitext(os.path.basename(__file__))[0]) -def plot_datacard_shapes(datacard, rules, stack=False, directory=".", - nom_format="{bin}__{process}.pdf", syst_format="{bin}__{process}__{syst}.pdf", mass="125", - **plot_kwargs): +def plot_datacard_shapes(datacard, rules, stack=False, directory=".", file_type="pdf", + nom_format="{bin}__{process}.{file_type}", + syst_format="{bin}__{process}__{syst}.{file_type}", mass="125", **plot_kwargs): """ Reads a *datacard* and plots its histogram shapes according to certain *rules*. A rule should be a tuple consisting of a datacard bin, a datacard process and an optional name of a systematic @@ -280,23 +280,24 @@ def plot_datacard_shapes(datacard, rules, stack=False, directory=".", continue # draw the shape - create_shape_plot(bin_name, proc_label, proc_shapes, param, directory, nom_format, - syst_format, **plot_kwargs) + create_shape_plot(bin_name, proc_label, proc_shapes, param, directory, file_type, + nom_format, syst_format, **plot_kwargs) @use_style("dhi_default") -def create_shape_plot(bin_name, proc_label, proc_shapes, param, directory, nom_format, syst_format, - binning="original", x_title="Datacard shape", y_min=None, y_max=None, y_min2=None, - y_max2=None, y_log=False, campaign_label=None): +def create_shape_plot(bin_name, proc_label, proc_shapes, param, directory, file_type, nom_format, + syst_format, binning="original", x_title="Datacard shape", y_min=None, y_max=None, + y_min2=None, y_max2=None, y_log=False, campaign_label=None): import plotlib.root as r ROOT = import_ROOT() # check if systematic shifts are to be plotted, determine the plot path plot_syst = param is not None and any(d is not None for _, d, _ in proc_shapes.values()) if plot_syst: - path = syst_format.format(bin=bin_name, process=proc_label, syst=param["name"]) + path = syst_format.format(bin=bin_name, process=proc_label, syst=param["name"], + file_type=file_type) else: - path = nom_format.format(bin=bin_name, process=proc_label) + path = nom_format.format(bin=bin_name, process=proc_label, file_type=file_type) path = path.replace("*", "X").replace("?", "Y").replace("!", "N") path = os.path.join(directory, path) logger.debug("going to create plot at {} for shapes in bin {} and process {}, stacking {} " @@ -384,6 +385,7 @@ def create_shape_plot(bin_name, proc_label, proc_shapes, param, directory, nom_f r.setup_pad(pad2, props={"TopMargin": 0.7, "Gridy": 1}) else: canvas, (pad1,) = r.routines.create_canvas() + r.setup_pad(pad1, props={"Logy": y_log}) pad1.cd() draw_objs1 = [] draw_objs2 = [] @@ -566,10 +568,13 @@ if __name__ == "__main__": "plots per process machted by a rule, stack distributions and create a single plot") parser.add_argument("--directory", "-d", default=".", help="directory in which produced plots " "are saved; defaults to the current directory") - parser.add_argument("--nom-format", default="{bin}__{process}.pdf", help="format for created " - "files when creating only nominal shapes; default: {bin}__{process}.pdf") - parser.add_argument("--syst-format", default="{bin}__{process}__{syst}.pdf", help="format for " - "created files when creating systematic shapes; default: {bin}__{process}__{syst}.pdf") + parser.add_argument("--file-type", "-f", default="pdf", help="the file type to produce; " + "default: pdf") + parser.add_argument("--nom-format", default="{bin}__{process}.{file_type}", help="format for " + "created files when creating only nominal shapes; default: {bin}__{process}.{file_type}") + parser.add_argument("--syst-format", default="{bin}__{process}__{syst}.{file_type}", + help="format for created files when creating systematic shapes; default: " + "{bin}__{process}__{syst}.{file_type}") parser.add_argument("--mass", "-m", default="125", help="mass hypothesis; default: 125") parser.add_argument("--binning", "-b", default="original", choices=["original", "numbers", "numbers_width"], help="the binning strategy; 'original': use original bin edges; " @@ -594,6 +599,7 @@ if __name__ == "__main__": # run the renaming with patch_object(logger, "name", args.log_name): plot_datacard_shapes(args.input, args.rules, stack=args.stack, directory=args.directory, - nom_format=args.nom_format, syst_format=args.syst_format, mass=args.mass, - binning=args.binning, x_title=args.x_title, y_min=args.y_min, y_max=args.y_max, - y_min2=args.y_min2, y_max2=args.y_max2, y_log=args.y_log, campaign_label=args.campaign) + file_type=args.file_type, nom_format=args.nom_format, syst_format=args.syst_format, + mass=args.mass, binning=args.binning, x_title=args.x_title, y_min=args.y_min, + y_max=args.y_max, y_min2=args.y_min2, y_max2=args.y_max2, y_log=args.y_log, + campaign_label=args.campaign) diff --git a/docs/content/datacard_manipulation.md b/docs/content/datacard_manipulation.md index b5bf76ac..69f60ee1 100644 --- a/docs/content/datacard_manipulation.md +++ b/docs/content/datacard_manipulation.md @@ -791,6 +791,7 @@ optional arguments: > plot_datacard_shapes.py --help usage: plot_datacard_shapes.py [-h] [--stack] [--directory DIRECTORY] + [--file-type FILE_TYPE] [--nom-format NOM_FORMAT] [--syst-format SYST_FORMAT] [--mass MASS] [--binning {original,numbers,numbers_width}] @@ -848,12 +849,14 @@ optional arguments: --directory DIRECTORY, -d DIRECTORY directory in which produced plots are saved; defaults to the current directory + --file-type FILE_TYPE, -f FILE_TYPE + the file type to produce; default: pdf --nom-format NOM_FORMAT format for created files when creating only nominal - shapes; default: {bin}__{process}.pdf + shapes; default: {bin}__{process}.{file_type} --syst-format SYST_FORMAT format for created files when creating systematic - shapes; default: {bin}__{process}__{syst}.pdf + shapes; default: {bin}__{process}__{syst}.{file_type} --mass MASS, -m MASS mass hypothesis; default: 125 --binning {original,numbers,numbers_width}, -b {original,numbers,numbers_width} the binning strategy; 'original': use original bin -- GitLab From bc22136ed3955860914d340f6d22400bdbe02b61 Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Fri, 27 Aug 2021 13:50:56 +0200 Subject: [PATCH 05/21] Make the default model configurable via env. --- dhi/tasks/combine.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/dhi/tasks/combine.py b/dhi/tasks/combine.py index 338ad86b..d9261d4b 100644 --- a/dhi/tasks/combine.py +++ b/dhi/tasks/combine.py @@ -25,6 +25,10 @@ from dhi.util import linspace, try_int, real_path, expand_path from dhi.datacard_tools import bundle_datacard +DEFAULT_HH_MODULE = os.getenv("DHI_DEFAULT_HH_MODULE", "HHModelPinv") +DEFAULT_HH_MODEL = os.getenv("DHI_DEFAULT_HH_MODEL", "model_default") + + def require_hh_model(func): @functools.wraps(func) def wrapper(self, *args, **kwargs): @@ -47,11 +51,12 @@ class HHModelTask(AnalysisTask): } hh_model = luigi.Parameter( - default="HHModelPinv.model_default", + default="{}.{}".format(DEFAULT_HH_MODULE, DEFAULT_HH_MODEL), description="the location of the HH model to use with optional configuration options in " - "the format module.model_name[@opt1[@opt2...]]; when no module name is given, the default " - "one 'dhi.models.HHModelPinv' is assumed; valid options are {}; default: " - "HHModelPinv.model_default".format(",".join(valid_hh_model_options)), + "the format [module.]model_name[@opt1[@opt2...]]; when no module is given, the default " + "'{0}' is assumed; valid options are {2}; default: {0}.{1}".format( + DEFAULT_HH_MODULE, DEFAULT_HH_MODEL, ",".join(valid_hh_model_options), + ), ) allow_empty_hh_model = False @@ -74,14 +79,13 @@ class HHModelTask(AnalysisTask): # the format used to be "module:model_name" before so adjust it to support legacy commands hh_model = hh_model.replace(":", ".") - # when there is no "." in the string, assume it to be the name of a model in the default - # model file "HHModelPinv" + # when there is no "." in the string, assume it to be the default module if "." not in hh_model: - hh_model = "HHModelPinv.{}".format(hh_model) + hh_model = "{}.{}".format(DEFAULT_HH_MODULE, hh_model) # split into module, model name and options front, options = hh_model.split("@", 1) if "@" in hh_model else (hh_model, "") - module_id, model_name = front.rsplit(".", 1) if "." in front else ("HHModelPinv", front) + module_id, model_name = front.rsplit(".", 1) if "." in front else ("DEFAULT_HH_MODULE", front) options = options.split("@") if options else [] # check if options are valid and split them into key value pairs -- GitLab From 4e34cc3ea268cc279f6fa8b9970eff0d36d8c5cd Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Fri, 27 Aug 2021 16:30:34 +0200 Subject: [PATCH 06/21] Make the default model configurable via env. --- dhi/tasks/combine.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dhi/tasks/combine.py b/dhi/tasks/combine.py index d9261d4b..7e05eb1b 100644 --- a/dhi/tasks/combine.py +++ b/dhi/tasks/combine.py @@ -25,10 +25,6 @@ from dhi.util import linspace, try_int, real_path, expand_path from dhi.datacard_tools import bundle_datacard -DEFAULT_HH_MODULE = os.getenv("DHI_DEFAULT_HH_MODULE", "HHModelPinv") -DEFAULT_HH_MODEL = os.getenv("DHI_DEFAULT_HH_MODEL", "model_default") - - def require_hh_model(func): @functools.wraps(func) def wrapper(self, *args, **kwargs): @@ -45,6 +41,9 @@ class HHModelTask(AnalysisTask): provides a few convenience functions for working with it. """ + DEFAULT_HH_MODULE = "HHModelPinv" + DEFAULT_HH_MODEL = "model_default" + valid_hh_model_options = { "noNNLOscaling", "noBRscaling", "noHscaling", "noklDependentUnc", "doProfilekl", "doProfilekt", "doProfileCV", "doProfileC2V", "doProfileC2", @@ -81,11 +80,11 @@ class HHModelTask(AnalysisTask): # when there is no "." in the string, assume it to be the default module if "." not in hh_model: - hh_model = "{}.{}".format(DEFAULT_HH_MODULE, hh_model) + hh_model = "{}.{}".format(cls.DEFAULT_HH_MODULE, hh_model) # split into module, model name and options front, options = hh_model.split("@", 1) if "@" in hh_model else (hh_model, "") - module_id, model_name = front.rsplit(".", 1) if "." in front else ("DEFAULT_HH_MODULE", front) + module_id, model_name = front.rsplit(".", 1) if "." in front else (cls.DEFAULT_HH_MODULE, front) options = options.split("@") if options else [] # check if options are valid and split them into key value pairs -- GitLab From 6f298457181a08cd8558aa7bb4bcb7ad1e6e8189 Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Mon, 30 Aug 2021 19:37:50 +0200 Subject: [PATCH 07/21] Minor fix in likelihood plot. --- dhi/plots/likelihoods.py | 14 +++++++++++--- dhi/tasks/combine.py | 8 ++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/dhi/plots/likelihoods.py b/dhi/plots/likelihoods.py index ac603714..67df5a07 100644 --- a/dhi/plots/likelihoods.py +++ b/dhi/plots/likelihoods.py @@ -252,7 +252,11 @@ def plot_likelihood_scans_1d( # default name d.setdefault("name", str(i + 1)) # keep only valid points - values = {k: np.array(v, dtype=np.float32) for k, v in values.items()} + values = { + k: np.array(v, dtype=np.float32) + for k, v in values.items() + if k in [poi1, poi2, "dnll2"] + } # preprocess values (nan detection, negative shift) values["dnll2"], values[poi] = _preprocess_values(values["dnll2"], (poi, values[poi]), shift_negative_values=shift_negative_values, origin="entry '{}'".format(d["name"]), @@ -671,7 +675,11 @@ def plot_likelihood_scans_2d( assert len(d["poi_mins"]) == 2 # default name d.setdefault("name", str(i + 1)) - values = {k: np.array(v, dtype=np.float32) for k, v in values.items()} + values = { + k: np.array(v, dtype=np.float32) + for k, v in values.items() + if k in [poi1, poi2, "dnll2"] + } # preprocess values (nan detection, negative shift) values["dnll2"], values[poi1], values[poi2] = _preprocess_values(values["dnll2"], (poi1, values[poi1]), (poi2, values[poi2]), shift_negative_values=shift_negative_values, @@ -1276,7 +1284,7 @@ def evaluate_likelihood_scan_2d( raise Exception("could not find minimum of nll2 interpolation: {}".format(res.message)) else: poi1_min_new = res.x[0] - poi2_min_new = res.x[0] + poi2_min_new = res.x[1] print("done, found {:.4f}, {:.4f}".format(poi1_min_new, poi2_min_new)) if xcheck: # compare and optionally issue a warning (threshold to be optimized) diff --git a/dhi/tasks/combine.py b/dhi/tasks/combine.py index 7e05eb1b..fe19c8c6 100644 --- a/dhi/tasks/combine.py +++ b/dhi/tasks/combine.py @@ -1074,8 +1074,7 @@ class POITask(DatacardTask, ParameterValuesTask): # store available pois on task level and potentially update them according to the model if self.hh_model_empty: - self.r_pois = tuple(self.__class__.r_pois) - self.k_pois = tuple(self.__class__.k_pois) + self.r_pois, self.k_pois = self.get_empty_hh_model_pois() else: model = self.load_hh_model()[1] self.r_pois = tuple(model.r_pois) @@ -1106,6 +1105,11 @@ class POITask(DatacardTask, ParameterValuesTask): raise Exception("{!r}: parameter values are not allowed to be in POIs, but " "found '{}'".format(self, p)) + def get_empty_hh_model_pois(self): + # hook that can be implemented to configure the r (and possibly k) POIs to be used + # when not hh model is configured + return tuple(self.__class__.r_pois), tuple(self.__class__.k_pois) + def store_parts(self): parts = super(POITask, self).store_parts() parts["poi"] = "poi_{}".format("_".join(self.pois)) -- GitLab From 521722c7b40d027a003c30594c742887737ff9d6 Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Mon, 30 Aug 2021 19:49:22 +0200 Subject: [PATCH 08/21] Minor fix in likelihood plot. --- dhi/plots/likelihoods.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dhi/plots/likelihoods.py b/dhi/plots/likelihoods.py index 67df5a07..8a13fb3d 100644 --- a/dhi/plots/likelihoods.py +++ b/dhi/plots/likelihoods.py @@ -251,7 +251,7 @@ def plot_likelihood_scans_1d( d.setdefault("poi_min", None) # default name d.setdefault("name", str(i + 1)) - # keep only valid points + # drop all fields except for required ones values = { k: np.array(v, dtype=np.float32) for k, v in values.items() @@ -441,6 +441,8 @@ def plot_likelihood_scan_2d( assert poi1 in _values assert poi2 in _values assert "dnll2" in _values + # drop all fields except for required ones + _values = {k: v for k, v in _values.items() if k in [poi1, poi2, "dnll2"]} # preprocess values (nan detection, negative shift) _values["dnll2"], _values[poi1], _values[poi2] = _preprocess_values(_values["dnll2"], (poi1, _values[poi1]), (poi2, _values[poi2]), remove_nans=interpolate_nans, @@ -675,6 +677,7 @@ def plot_likelihood_scans_2d( assert len(d["poi_mins"]) == 2 # default name d.setdefault("name", str(i + 1)) + # drop all fields except for required ones and convert to arrays values = { k: np.array(v, dtype=np.float32) for k, v in values.items() -- GitLab From 4ae1f73d45a683e6afe1840e6941a3caa8c4e9df Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Mon, 30 Aug 2021 21:03:55 +0200 Subject: [PATCH 09/21] Fix poi selection for scan tasks. --- dhi/tasks/combine.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dhi/tasks/combine.py b/dhi/tasks/combine.py index fe19c8c6..80d559ad 100644 --- a/dhi/tasks/combine.py +++ b/dhi/tasks/combine.py @@ -1250,6 +1250,14 @@ class POIScanTask(POITask, ParameterScanTask): raise Exception("scan parameters are not allowed to be in parameter ranges, " "but found {}".format(p)) + def _joined_parameter_values_pois(self): + pois = super(POIScanTask, self)._joined_parameter_values_pois() + + # skip scan parameters + pois = [p for p in pois if p not in self.scan_parameter_names] + + return pois + def get_output_postfix_pois(self): use_all_pois = self.allow_parameter_values_in_pois or self.force_scan_parameters_equal_pois return self.all_pois if use_all_pois else self.other_pois -- GitLab From 7824152247d1634841b0f69030d4be4ec6028337 Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Mon, 30 Aug 2021 22:12:34 +0200 Subject: [PATCH 10/21] Add global snapshot task that is used across combine tasks. --- dhi/tasks/__init__.py | 1 + dhi/tasks/likelihoods.py | 47 +++++++---------- dhi/tasks/limits.py | 48 ++++++++++++++--- dhi/tasks/postfit.py | 53 ++++++++++++------- dhi/tasks/pulls_impacts.py | 74 ++++++++++---------------- dhi/tasks/significances.py | 61 +++++++++++++++++----- dhi/tasks/snapshot.py | 104 +++++++++++++++++++++++++++++++++++++ 7 files changed, 274 insertions(+), 114 deletions(-) create mode 100644 dhi/tasks/snapshot.py diff --git a/dhi/tasks/__init__.py b/dhi/tasks/__init__.py index 711c61ad..20d1b25d 100644 --- a/dhi/tasks/__init__.py +++ b/dhi/tasks/__init__.py @@ -4,6 +4,7 @@ # import all task modules import dhi.tasks.base import dhi.tasks.combine +import dhi.tasks.snapshot import dhi.tasks.limits import dhi.tasks.likelihoods import dhi.tasks.significances diff --git a/dhi/tasks/likelihoods.py b/dhi/tasks/likelihoods.py index 31fad193..c4cb1192 100644 --- a/dhi/tasks/likelihoods.py +++ b/dhi/tasks/likelihoods.py @@ -20,10 +20,11 @@ from dhi.tasks.combine import ( POIPlotTask, CreateWorkspace, ) +from dhi.tasks.snapshot import Snapshot, SnapshotUser from dhi.util import unique_recarray, extend_recarray, convert_dnll2 -class LikelihoodBase(POIScanTask): +class LikelihoodBase(POIScanTask, SnapshotUser): pois = copy.copy(POIScanTask.pois) pois._default = ("kl",) @@ -34,61 +35,40 @@ class LikelihoodBase(POIScanTask): class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow): - snapshot_branch = luigi.IntParameter( - default=law.NO_INT, - description="when set, use the fit from this branch task as a snapshot; default: empty", - ) - save_snapshot = luigi.BoolParameter( - default=False, - description="when set, saves the best fit snapshot workspace in the output file; " - "default: False", - ) - run_command_in_tmp = True def create_branch_map(self): return self.get_scan_linspace() - @property - def use_snapshot(self): - return self.snapshot_branch not in (None, law.NO_INT) - def workflow_requires(self): reqs = super(LikelihoodScan, self).workflow_requires() reqs["workspace"] = CreateWorkspace.req(self) if self.use_snapshot: - reqs["snapshot"] = self.req(self, branches=(self.snapshot_branch,), save_snapshot=True, - snapshot_branch=law.NO_INT) + reqs["snapshot"] = Snapshot.req(self) return reqs def requires(self): reqs = {"workspace": CreateWorkspace.req(self)} if self.use_snapshot: - reqs["snapshot"] = self.req(self, branch=self.snapshot_branch, save_snapshot=True, - snapshot_branch=law.NO_INT) + reqs["snapshot"] = Snapshot.req(self, branch=0) return reqs def output(self): parts = [] - if self.save_snapshot: - parts.append("withsnapshot") if self.use_snapshot: - parts.extend(["fromsnapshot"] + [ - "{}{}".format(*tpl) for tpl in - zip(self.scan_parameter_names, self.branch_map[self.snapshot_branch]) - ]) + parts.append("fromsnapshot") name = self.join_postfix(["likelihood", self.get_output_postfix(), parts]) + ".root" return self.local_target(name) def build_command(self): # get the workspace to use and define snapshot args - snapshot_args = "--saveWorkspace" if self.save_snapshot else "" if self.use_snapshot: workspace = self.input()["snapshot"].path - snapshot_args += " --snapshotName MultiDimFit" + snapshot_args = " --snapshotName MultiDimFit" else: workspace = self.input()["workspace"].path + snapshot_args = "" # args for blinding / unblinding if self.unblinded: @@ -137,7 +117,12 @@ class MergeLikelihoodScan(LikelihoodBase): return LikelihoodScan.req(self) def output(self): - return self.local_target("limits__" + self.get_output_postfix() + ".npz") + parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") + + name = self.join_postfix(["likelihoods", self.get_output_postfix(), parts]) + ".npz" + return self.local_target(name) @law.decorator.log @law.decorator.safe_output @@ -260,6 +245,8 @@ class PlotLikelihoodScan(LikelihoodBase, POIPlotTask): def output(self): # additional postfix parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") if self.n_pois == 1 and self.y_log: parts.append("log") @@ -494,6 +481,8 @@ class PlotMultipleLikelihoodScans(PlotLikelihoodScan, MultiDatacardTask): def output(self): # additional postfix parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") if self.n_pois == 1 and self.y_log: parts.append("log") @@ -607,6 +596,8 @@ class PlotMultipleLikelihoodScansByModel(PlotLikelihoodScan, MultiHHModelTask): def output(self): # additional postfix parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") if self.n_pois == 1 and self.y_log: parts.append("log") diff --git a/dhi/tasks/limits.py b/dhi/tasks/limits.py index 86db70a9..b8fd6bb7 100644 --- a/dhi/tasks/limits.py +++ b/dhi/tasks/limits.py @@ -18,11 +18,12 @@ from dhi.tasks.combine import ( POIPlotTask, CreateWorkspace, ) +from dhi.tasks.snapshot import Snapshot, SnapshotUser from dhi.util import unique_recarray, real_path from dhi.config import br_hh -class UpperLimitsBase(POIScanTask): +class UpperLimitsBase(POIScanTask, SnapshotUser): force_scan_parameters_unequal_pois = True @@ -36,16 +37,34 @@ class UpperLimits(UpperLimitsBase, CombineCommandTask, law.LocalWorkflow, HTCond def workflow_requires(self): reqs = super(UpperLimits, self).workflow_requires() - reqs["workspace"] = self.requires_from_branch() + reqs["workspace"] = CreateWorkspace.req(self) + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self) return reqs def requires(self): - return CreateWorkspace.req(self) + reqs = {"workspace": CreateWorkspace.req(self)} + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self, branch=0) + return reqs def output(self): - return self.local_target("limit__" + self.get_output_postfix() + ".root") + parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") + + name = self.join_postfix(["limit", self.get_output_postfix(), parts]) + ".root" + return self.local_target(name) def build_command(self): + # get the workspace to use and define snapshot args + if self.use_snapshot: + workspace = self.input()["snapshot"].path + snapshot_args = " --snapshotName MultiDimFit" + else: + workspace = self.input()["workspace"].path + snapshot_args = "" + # arguments for un/blinding if self.unblinded: blinded_args = "--seed {self.branch}".format(self=self) @@ -69,14 +88,16 @@ class UpperLimits(UpperLimitsBase, CombineCommandTask, law.LocalWorkflow, HTCond " --setParameters {self.joined_scan_values},{self.joined_parameter_values}" " --freezeParameters {self.joined_frozen_parameters}" " --freezeNuisanceGroups {self.joined_frozen_groups}" + " {snapshot_args}" " {self.combine_optimization_args}" " && " "mv higgsCombineTest.AsymptoticLimits.mH{self.mass_int}.{self.branch}.root {output}" ).format( self=self, - workspace=self.input().path, + workspace=workspace, output=self.output().path, blinded_args=blinded_args, + snapshot_args=snapshot_args, ) return cmd @@ -111,7 +132,12 @@ class MergeUpperLimits(UpperLimitsBase): return UpperLimits.req(self) def output(self): - return self.local_target("limits__" + self.get_output_postfix() + ".npz") + parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") + + name = self.join_postfix(["limits", self.get_output_postfix(), parts]) + ".npz" + return self.local_target(name) @law.decorator.log @law.decorator.safe_output @@ -205,6 +231,8 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask): def output(self): # additional postfix parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") if self.xsec in ["pb", "fb"]: parts.append(self.xsec) if self.br != law.NO_STR: @@ -348,6 +376,8 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask): def output(self): # additional postfix parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") if self.xsec in ["pb", "fb"]: parts.append(self.xsec) if self.br != law.NO_STR: @@ -464,6 +494,8 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask): def output(self): # additional postfix parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") if self.xsec in ["pb", "fb"]: parts.append(self.xsec) if self.br != law.NO_STR: @@ -695,6 +727,8 @@ class PlotUpperLimitsAtPoint(POIPlotTask, MultiDatacardTask, BoxPlotTask): def output(self): # additional postfix parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") if self.xsec in ["pb", "fb"]: parts.append(self.xsec) if self.br != law.NO_STR: @@ -852,6 +886,8 @@ class PlotUpperLimits2D(UpperLimitsBase, POIPlotTask): def output(self): # additional postfix parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") if self.z_log: parts.append("log") diff --git a/dhi/tasks/postfit.py b/dhi/tasks/postfit.py index 00e9d422..a25941f4 100644 --- a/dhi/tasks/postfit.py +++ b/dhi/tasks/postfit.py @@ -12,6 +12,7 @@ import luigi from dhi.tasks.base import HTCondorWorkflow, view_output_plots from dhi.tasks.combine import CombineCommandTask, POITask, POIPlotTask, CreateWorkspace +from dhi.tasks.snapshot import Snapshot, SnapshotUser class SAVEFLAGS(str, enum.Enum): @@ -29,7 +30,7 @@ class SAVEFLAGS(str, enum.Enum): return list(map(lambda x: x.value, cls)) -class FitDiagnostics(POITask, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow): +class FitDiagnostics(POITask, CombineCommandTask, SnapshotUser, law.LocalWorkflow, HTCondorWorkflow): pois = law.CSVParameter( default=("r",), @@ -56,38 +57,50 @@ class FitDiagnostics(POITask, CombineCommandTask, law.LocalWorkflow, HTCondorWor run_command_in_tmp = True def create_branch_map(self): - return [""] # single branch with empty data + # single branch with empty data + return [None] def workflow_requires(self): reqs = super(FitDiagnostics, self).workflow_requires() - reqs["workspace"] = self.requires_from_branch() + reqs["workspace"] = CreateWorkspace.req(self) + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self) return reqs def requires(self): - return CreateWorkspace.req(self) + reqs = {"workspace": CreateWorkspace.req(self)} + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self, branch=0) + return reqs def output(self): - parts = [self.get_output_postfix()] + parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") if not self.skip_b_only: parts.append("withBOnly") if self.skip_save: - parts.append(map("no{}".format, sorted(self.skip_save))) - postfix = self.join_postfix(parts) + parts.append(map("not{}".format, sorted(self.skip_save))) + name = lambda prefix: self.join_postfix([prefix, self.get_output_postfix(), parts]) return { - "result": self.local_target("result__{}.root".format(postfix)), - "diagnostics": self.local_target("fitdiagnostics__{}.root".format(postfix)), + "result": self.local_target(name("result") + ".root"), + "diagnostics": self.local_target(name("fitdiagnostics") + ".root"), } - @property - def blinded_args(self): - if self.unblinded: - return "" + def build_command(self): + # get the workspace to use and define snapshot args + if self.use_snapshot: + workspace = self.input()["snapshot"].path + snapshot_args = " --snapshotName MultiDimFit" else: - return "--toys {self.toys}".format(self=self) + workspace = self.input()["workspace"].path + snapshot_args = "" - def build_command(self): - outputs = self.output() + # arguments for un/blinding + blinded_args = "" + if self.blinded: + blinded_args = "--toys {self.toys}".format(self=self) # prepare optional flags flags = [] @@ -97,18 +110,20 @@ class FitDiagnostics(POITask, CombineCommandTask, law.LocalWorkflow, HTCondorWor if save_flag not in self.skip_save: flags.append("--save{}".format(save_flag)) + outputs = self.output() return ( "combine -M FitDiagnostics {workspace}" " {self.custom_args}" " --verbose 1" " --mass {self.mass}" - " {self.blinded_args}" + " {blinded_args}" " --redefineSignalPOIs {self.joined_pois}" " --setParameterRanges {self.joined_parameter_ranges}" " --setParameters {self.joined_parameter_values}" " --freezeParameters {self.joined_frozen_parameters}" " --freezeNuisanceGroups {self.joined_frozen_groups}" " {flags}" + " {snapshot_args}" " {self.combine_optimization_args}" " && " "mv higgsCombineTest.FitDiagnostics.mH{self.mass_int}{postfix}.root {output_result}" @@ -116,10 +131,12 @@ class FitDiagnostics(POITask, CombineCommandTask, law.LocalWorkflow, HTCondorWor "mv fitDiagnosticsTest.root {output_diagnostics}" ).format( self=self, - workspace=self.input().path, + workspace=workspace, postfix="" if SAVEFLAGS.Toys in self.skip_save else ".123456", output_result=outputs["result"].path, output_diagnostics=outputs["diagnostics"].path, + blinded_args=blinded_args, + snapshot_args=snapshot_args, flags=" ".join(flags), ) diff --git a/dhi/tasks/pulls_impacts.py b/dhi/tasks/pulls_impacts.py index 587de2d6..06684165 100644 --- a/dhi/tasks/pulls_impacts.py +++ b/dhi/tasks/pulls_impacts.py @@ -12,10 +12,11 @@ import numpy as np from dhi.tasks.base import HTCondorWorkflow, BoxPlotTask, view_output_plots from dhi.tasks.combine import CombineCommandTask, POITask, POIPlotTask, CreateWorkspace +from dhi.tasks.snapshot import Snapshot, SnapshotUser from dhi.datacard_tools import get_workspace_parameters -class PullsAndImpactsBase(POITask): +class PullsAndImpactsBase(POITask, SnapshotUser): only_parameters = law.CSVParameter( default=(), @@ -31,11 +32,6 @@ class PullsAndImpactsBase(POITask): description="when True, calculate pulls and impacts for MC stats nuisances as well; " "default: False", ) - use_snapshot = luigi.BoolParameter( - default=False, - description="when True, run the initial fit first and use it as a snapshot for nuisance " - "fits; default: False", - ) mc_stats_patterns = ["*prop_bin*"] @@ -50,13 +46,6 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow def __init__(self, *args, **kwargs): super(PullsAndImpacts, self).__init__(*args, **kwargs) - # encourage using snapshots when running unblinded - if self.unblinded and not self.use_snapshot and self.is_workflow() and self.branches != (0,): - self.logger.info_once("unblinded_no_snapshot", "you are running unblinded pulls and " - "impacts without using the initial fit as a snapshot for nuisance fits; you might " - "consider to do so by adding '--use-snapshot' to your command as this can lead to " - "more stable results and faster fits") - self._cache_branches = False def create_branch_map(self): @@ -100,51 +89,44 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow def workflow_requires(self): reqs = super(PullsAndImpacts, self).workflow_requires() reqs["workspace"] = CreateWorkspace.req(self) - if self.use_snapshot and set(self.branch_map) != {0}: - reqs["snapshot"] = self.req(self, branches=(0,)) + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self) return reqs def requires(self): reqs = {"workspace": CreateWorkspace.req(self)} - if self.branch > 0 and self.use_snapshot: - reqs["snapshot"] = self.req(self, branch=0) + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self, branch=0) return reqs def output(self): parts = [self.branch_data] - if self.branch > 0 and self.use_snapshot: + if self.use_snapshot: parts.append("fromsnapshot") - name = lambda s: self.join_postfix([s, self.get_output_postfix()] + parts) + ".root" - - outputs = {"fit": self.local_target(name("fit"))} - if self.branch == 0: - outputs["fitresult"] = self.local_target(name("fitresult"), optional=True) - return outputs - - @property - def blinded_args(self): - if self.unblinded: - return "--seed {self.branch}".format(self=self) - else: - return "--seed {self.branch} --toys {self.toys}".format(self=self) + name = self.join_postfix(["fit", self.get_output_postfix(), parts]) + ".root" + return self.local_target(name) def build_command(self): - # check if a snapshot is used - use_snapshot = self.use_snapshot and self.branch > 0 - - # get the workspace to use - if use_snapshot: - workspace = self.input()["snapshot"]["fit"].path + # get the workspace to use and define snapshot args + if self.use_snapshot: + workspace = self.input()["snapshot"].path + snapshot_args = " --snapshotName MultiDimFit" else: workspace = self.input()["workspace"].path + snapshot_args = "" + + # args for blinding / unblinding + if self.unblinded: + blinded_args = "--seed {self.branch}".format(self=self) + else: + blinded_args = "--seed {self.branch} --toys {self.toys}".format(self=self) # define branch dependent options if self.branch == 0: # initial fit branch_opts = ( " --algo singles" - " --saveWorkspace" " --saveFitResult" ) else: @@ -155,17 +137,14 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow " --floatOtherPOIs 1" " --saveInactivePOI 1" ).format(self.branch_data) - if use_snapshot: - branch_opts += " --snapshotName MultiDimFit" # define the basic command - outputs = self.output() cmd = ( "combine -M MultiDimFit {workspace}" " {self.custom_args}" " --verbose 1" " --mass {self.mass}" - " {self.blinded_args}" + " {blinded_args}" " --redefineSignalPOIs {self.pois[0]}" " --setParameterRanges {self.joined_parameter_ranges}" " --setParameters {self.joined_parameter_values}" @@ -173,6 +152,7 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow " --freezeNuisanceGroups {self.joined_frozen_groups}" " --saveNLL" " --X-rtd REMOVE_CONSTANT_ZERO_POINT=1" + " {snapshot_args}" " {self.combine_optimization_args}" " {branch_opts}" " && " @@ -180,14 +160,12 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow ).format( self=self, workspace=workspace, - output=outputs["fit"].path, + output=self.output().path, + snapshot_args=snapshot_args, + blinded_args=blinded_args, branch_opts=branch_opts, ) - # for the initial fit, also move the fitresult - if self.branch == 0: - cmd += " && mv multidimfitTest.root {}".format(outputs["fitresult"].path) - return cmd def htcondor_output_postfix(self): @@ -241,7 +219,7 @@ class MergePullsAndImpacts(PullsAndImpactsBase): fit_results = {} fail_info = [] for b, name in branch_map.items(): - inp = inputs[b]["fit"] + inp = inputs[b] if not inp.exists(): self.logger.warning("input of branch {} at {} does not exist".format(b, inp.path)) continue diff --git a/dhi/tasks/significances.py b/dhi/tasks/significances.py index 5d42c244..afc31dd3 100644 --- a/dhi/tasks/significances.py +++ b/dhi/tasks/significances.py @@ -15,10 +15,11 @@ from dhi.tasks.combine import ( POIPlotTask, CreateWorkspace, ) +from dhi.tasks.snapshot import Snapshot, SnapshotUser from dhi.util import unique_recarray -class SignificanceBase(POIScanTask): +class SignificanceBase(POIScanTask, SnapshotUser): force_scan_parameters_unequal_pois = True allow_parameter_values_in_pois = True @@ -56,45 +57,68 @@ class SignificanceScan(SignificanceBase, CombineCommandTask, law.LocalWorkflow, def workflow_requires(self): reqs = super(SignificanceScan, self).workflow_requires() - reqs["workspace"] = self.requires_from_branch() + reqs["workspace"] = CreateWorkspace.req(self) + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self) return reqs def requires(self): - return CreateWorkspace.req(self) + reqs = {"workspace": CreateWorkspace.req(self)} + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self, branch=0) + return reqs def output(self): - return self.local_target("significance__" + self.get_output_postfix() + ".root") + parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") + + name = self.join_postfix(["significance", self.get_output_postfix(), parts]) + ".root" + return self.local_target(name) + + def build_command(self): + # get the workspace to use and define snapshot args + if self.use_snapshot: + workspace = self.input()["snapshot"].path + snapshot_args = " --snapshotName MultiDimFit" + else: + workspace = self.input()["workspace"].path + snapshot_args = "" - @property - def blinded_args(self): + # arguments for un/blinding if self.unblinded: - return "--seed {self.branch}".format(self=self) + blinded_args = "--seed {self.branch}".format(self=self) elif self.frequentist_toys: - return "--seed {self.branch} --toys {self.toys} --toysFreq".format(self=self) + blinded_args = "--seed {self.branch} --toys {self.toys} --toysFreq".format(self=self) else: - return "--seed {self.branch} --toys {self.toys}".format(self=self) + blinded_args = "--seed {self.branch} --toys {self.toys}".format(self=self) - def build_command(self): - return ( + # build the command + cmd = ( "combine -M Significance {workspace}" " {self.custom_args}" " --verbose 1" " --mass {self.mass}" - " {self.blinded_args}" + " {blinded_args}" " --redefineSignalPOIs {self.joined_pois}" " --setParameterRanges {self.joined_parameter_ranges}" " --setParameters {self.joined_scan_values},{self.joined_parameter_values}" " --freezeParameters {self.joined_frozen_parameters}" " --freezeNuisanceGroups {self.joined_frozen_groups}" + " {snapshot_args}" " {self.combine_optimization_args}" " && " "mv higgsCombineTest.Significance.mH{self.mass_int}.{self.branch}.root {output}" ).format( self=self, - workspace=self.input().path, + workspace=workspace, output=self.output().path, + blinded_args=blinded_args, + snapshot_args=snapshot_args, ) + return cmd + class MergeSignificanceScan(SignificanceBase): @@ -102,7 +126,12 @@ class MergeSignificanceScan(SignificanceBase): return SignificanceScan.req(self) def output(self): - return self.local_target("significance__{}.npz".format(self.get_output_postfix())) + parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") + + name = self.join_postfix(["significance", self.get_output_postfix(), parts]) + ".npz" + return self.local_target(name) @law.decorator.log def run(self): @@ -178,6 +207,8 @@ class PlotSignificanceScan(SignificanceBase, POIPlotTask): def output(self): # additional postfix parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") if self.y_log: parts.append("log") @@ -268,6 +299,8 @@ class PlotMultipleSignificanceScans(PlotSignificanceScan, MultiDatacardTask): def output(self): # additional postfix parts = [] + if self.use_snapshot: + parts.append("fromsnapshot") if self.y_log: parts.append("log") diff --git a/dhi/tasks/snapshot.py b/dhi/tasks/snapshot.py new file mode 100644 index 00000000..fcdd5a15 --- /dev/null +++ b/dhi/tasks/snapshot.py @@ -0,0 +1,104 @@ +# coding: utf-8 + +""" +Tasks related to defining and using snapshots. +""" + +import copy + +import law +import luigi +import six + +from dhi.tasks.base import HTCondorWorkflow +from dhi.tasks.combine import ( + CombineCommandTask, + POITask, + CreateWorkspace, +) + + +class Snapshot(POITask, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow): + + pois = copy.copy(POITask.pois) + pois._default = ("r",) + + force_scan_parameters_equal_pois = True + allow_parameter_values_in_pois = True + run_command_in_tmp = True + + @classmethod + def req_params(cls, *args, **kwargs): + prefer_cli = kwargs.get("_prefer_cli", None) + if isinstance(prefer_cli, six.string_types): + prefer_cli = {prefer_cli} + else: + prefer_cli = {prefer_cli} if prefer_cli else set() + + # prefer all kinds of parameters from the command line + prefer_cli |= { + "toys", "frozen_parameters", "frozen_groups", "minimizer", "parameter_values", + "parameter_ranges", "workflow", "max_runtime", + } + + kwargs["_prefer_cli"] = prefer_cli + return super(Snapshot, cls).req_params(*args, **kwargs) + + def create_branch_map(self): + # single branch that does not need special data + return [None] + + def workflow_requires(self): + reqs = super(Snapshot, self).workflow_requires() + reqs["workspace"] = self.requires_from_branch() + return reqs + + def requires(self): + return CreateWorkspace.req(self) + + def output(self): + name = self.join_postfix(["snapshot", self.get_output_postfix()]) + ".root" + return self.local_target(name) + + def build_command(self): + # args for blinding / unblinding + if self.unblinded: + blinded_args = "--seed {self.branch}".format(self=self) + else: + blinded_args = "--seed {self.branch} --toys {self.toys}".format(self=self) + + # build the command + cmd = ( + "combine -M MultiDimFit {workspace}" + " {self.custom_args}" + " --verbose 1" + " --mass {self.mass}" + " {blinded_args}" + " --redefineSignalPOIs {self.joined_pois}" + " --setParameters {self.joined_parameter_values}" + " --setParameterRanges {self.joined_parameter_ranges}" + " --freezeParameters {self.joined_frozen_parameters}" + " --freezeNuisanceGroups {self.joined_frozen_groups}" + " --saveWorkspace" + " --saveNLL" + " --X-rtd REMOVE_CONSTANT_ZERO_POINT=1" + " {self.combine_optimization_args}" + " && " + "mv higgsCombineTest.MultiDimFit.mH{self.mass_int}.{self.branch}.root {output}" + ).format( + self=self, + workspace=self.input().path, + output=self.output().path, + blinded_args=blinded_args, + ) + + return cmd + + +class SnapshotUser(object): + + use_snapshot = luigi.BoolParameter( + default=False, + description="when set, perform a MultiDimFit first and use its workspace as a snapshot; " + "default: False", + ) -- GitLab From 501362f3c8a3076d70f4f314ee13802937055f57 Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Tue, 31 Aug 2021 11:27:31 +0200 Subject: [PATCH 11/21] Improve highlighting of long commands. --- dhi/tasks/base.py | 23 +++++++++++++++++++---- dhi/tasks/combine.py | 23 +++++++++++++++++++---- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/dhi/tasks/base.py b/dhi/tasks/base.py index 76d0948b..328b95f7 100644 --- a/dhi/tasks/base.py +++ b/dhi/tasks/base.py @@ -60,8 +60,16 @@ class BaseTask(law.Task): cmd = dep.get_command() if cmd: - cmd = law.util.quote_cmd(cmd) if isinstance(cmd, (list, tuple)) else cmd - text += law.util.colored(cmd, "cyan") + # when cmd is a 2-tuple, i.e. the real command and a representation for printing + # pick the second one + if isinstance(cmd, tuple) and len(cmd) == 2: + cmd = cmd[1] + else: + if isinstance(cmd, list): + cmd = law.util.quote_cmd(cmd) + # defaut highlighting + cmd = law.util.colored(cmd, "cyan") + text += cmd else: text += law.util.colored("empty", "red") print(offset + text) @@ -452,10 +460,14 @@ class CommandTask(AnalysisTask): parent.touch() handled_parent_uris.add(parent.uri()) - def run_command(self, cmd, optional=False, **kwargs): + def run_command(self, cmd, highlighted_cmd=None, optional=False, **kwargs): # proper command encoding cmd = (law.util.quote_cmd(cmd) if isinstance(cmd, (list, tuple)) else cmd).strip() + # default highlighted command + if not highlighted_cmd: + highlighted_cmd = law.util.colored(cmd, "cyan") + # when no cwd was set and run_command_in_tmp is True, create a tmp dir if "cwd" not in kwargs and self.run_command_in_tmp: tmp_dir = law.LocalDirectoryTarget(is_tmp=True) @@ -464,7 +476,7 @@ class CommandTask(AnalysisTask): self.publish_message("cwd: {}".format(kwargs.get("cwd", os.getcwd()))) # call it - with self.publish_step("running '{}' ...".format(law.util.colored(cmd, "cyan"))): + with self.publish_step("running '{}' ...".format(highlighted_cmd)): p, lines = law.util.readable_popen(cmd, shell=True, executable="/bin/bash", **kwargs) for line in lines: print(line) @@ -487,6 +499,9 @@ class CommandTask(AnalysisTask): # get the command cmd = self.get_command() + if isinstance(cmd, tuple) and len(cmd) == 2: + kwargs["highlighted_cmd"] = cmd[1] + cmd = cmd[0] # run it self.run_command(cmd, **kwargs) diff --git a/dhi/tasks/combine.py b/dhi/tasks/combine.py index 80d559ad..319e4be0 100644 --- a/dhi/tasks/combine.py +++ b/dhi/tasks/combine.py @@ -1412,10 +1412,18 @@ class CombineCommandTask(CommandTask): def is_opt(s): return s.startswith("-") and not is_number(s[1:]) + # highlighting + cyan = lambda s: law.util.colored(s, "cyan") + cyan_bright = lambda s: law.util.colored(s, "cyan", style="bright") + bright = lambda s: law.util.colored(s, "default", style="bright") + # extract the "combine ..." part - cmds = [c.strip() for c in cmd.split(" && ")] - for i, c in enumerate(list(cmds)): + cmds = [] + highlighted_cmds = [] + for c in (c.strip() for c in cmd.split(" && ")): if not c.startswith("combine "): + cmds.append(c) + highlighted_cmds.append(cyan(c)) continue # first, replace aliases @@ -1455,9 +1463,16 @@ class CombineCommandTask(CommandTask): ] # build the full command again - cmds[i] = law.util.quote_cmd(["combine"] + leading_values + law.util.flatten(params)) + cmds.append(law.util.quote_cmd(["combine"] + leading_values + law.util.flatten(params))) + + # add the highlighted command + highlighted_cmd = [] + for i, p in enumerate(shlex.split(cmds[-1])): + func = cyan_bright if (i == 0 and p == "combine") or p.startswith("--") else cyan + highlighted_cmd.append(func(p)) + highlighted_cmds.append(" ".join(highlighted_cmd)) - return " && ".join(cmds) + return " && ".join(cmds), bright(" && ").join(highlighted_cmds) class InputDatacards(DatacardTask, law.ExternalTask): -- GitLab From 96142016ddccbe691be7b8d1ddde08b8c5257b79 Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Wed, 1 Sep 2021 17:25:18 +0200 Subject: [PATCH 12/21] Fix PlotUpperLimitsAtPoint. --- dhi/tasks/limits.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhi/tasks/limits.py b/dhi/tasks/limits.py index b8fd6bb7..abc189ae 100644 --- a/dhi/tasks/limits.py +++ b/dhi/tasks/limits.py @@ -602,7 +602,7 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask): ) -class PlotUpperLimitsAtPoint(POIPlotTask, MultiDatacardTask, BoxPlotTask): +class PlotUpperLimitsAtPoint(POIPlotTask, SnapshotUser, MultiDatacardTask, BoxPlotTask): xsec = PlotUpperLimits.xsec br = PlotUpperLimits.br -- GitLab From 705f0c3812c647cb9f4f316d54dc4a230c652e88 Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Thu, 2 Sep 2021 14:49:56 +0200 Subject: [PATCH 13/21] Fix parameter value forwarding in PlotUpperLimitsAtPoint. --- dhi/tasks/limits.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/dhi/tasks/limits.py b/dhi/tasks/limits.py index abc189ae..6c3867d3 100644 --- a/dhi/tasks/limits.py +++ b/dhi/tasks/limits.py @@ -20,7 +20,7 @@ from dhi.tasks.combine import ( ) from dhi.tasks.snapshot import Snapshot, SnapshotUser from dhi.util import unique_recarray, real_path -from dhi.config import br_hh +from dhi.config import br_hh, poi_data class UpperLimitsBase(POIScanTask, SnapshotUser): @@ -708,12 +708,14 @@ class PlotUpperLimitsAtPoint(POIPlotTask, SnapshotUser, MultiDatacardTask, BoxPl return self._external_limit_values def requires(self): - scan_parameter_value = self.parameter_values_dict.get(self.pseudo_scan_parameter, 1.0) - scan_parameter = (self.pseudo_scan_parameter, scan_parameter_value, scan_parameter_value, 1) + default = poi_data.get(self.pseudo_scan_parameter, {}).get("sm_value", 1.0) + value = self.parameter_values_dict.get(self.pseudo_scan_parameter, default) + scan_parameter = (self.pseudo_scan_parameter, value, value, 1) parameter_values = tuple( - pv for pv in self.parameter_values - if not pv.startswith(self.pseudo_scan_parameter + "=") + (p, v) for p, v in self.parameter_values_dict.items() + if p != self.pseudo_scan_parameter ) + return [ UpperLimits.req( self, -- GitLab From 60cf7ece8cb4a91ef263ed394113c3d5d96f7a53 Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Fri, 3 Sep 2021 12:45:47 +0200 Subject: [PATCH 14/21] Simplify usage of snapshots. --- dhi/tasks/exclusion.py | 21 ++++++++++++-- dhi/tasks/gof.py | 58 +++++++++++++++++++++++++++----------- dhi/tasks/likelihoods.py | 26 +++++++---------- dhi/tasks/limits.py | 38 ++++++++++++------------- dhi/tasks/postfit.py | 25 +++++++++++++--- dhi/tasks/pulls_impacts.py | 15 +++++----- dhi/tasks/significances.py | 18 +++--------- 7 files changed, 121 insertions(+), 80 deletions(-) diff --git a/dhi/tasks/exclusion.py b/dhi/tasks/exclusion.py index 59a380d5..5ec3d928 100644 --- a/dhi/tasks/exclusion.py +++ b/dhi/tasks/exclusion.py @@ -11,12 +11,13 @@ import luigi from dhi.tasks.base import BoxPlotTask, view_output_plots from dhi.tasks.combine import MultiDatacardTask, POIScanTask, POIPlotTask +from dhi.tasks.snapshot import SnapshotUser from dhi.tasks.limits import MergeUpperLimits, PlotUpperLimits from dhi.tasks.likelihoods import MergeLikelihoodScan, PlotLikelihoodScan from dhi.config import br_hh -class PlotExclusionAndBestFit(POIScanTask, MultiDatacardTask, POIPlotTask, BoxPlotTask): +class PlotExclusionAndBestFit(POIScanTask, MultiDatacardTask, POIPlotTask, SnapshotUser, BoxPlotTask): show_best_fit = PlotLikelihoodScan.show_best_fit show_best_fit_error = PlotLikelihoodScan.show_best_fit_error @@ -55,6 +56,14 @@ class PlotExclusionAndBestFit(POIScanTask, MultiDatacardTask, POIPlotTask, BoxPl return reqs + def get_output_postfix(self, join=True): + parts = super(PlotExclusionAndBestFit, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts + def output(self): names = self.create_plot_names(["exclusionbestfit", self.get_output_postfix()]) return [self.local_target(name) for name in names] @@ -129,7 +138,7 @@ class PlotExclusionAndBestFit(POIScanTask, MultiDatacardTask, POIPlotTask, BoxPl ) -class PlotExclusionAndBestFit2D(POIScanTask, POIPlotTask): +class PlotExclusionAndBestFit2D(POIScanTask, POIPlotTask, SnapshotUser): show_best_fit = PlotLikelihoodScan.show_best_fit show_best_fit_error = copy.copy(PlotLikelihoodScan.show_best_fit_error) @@ -190,6 +199,14 @@ class PlotExclusionAndBestFit2D(POIScanTask, POIPlotTask): return reqs + def get_output_postfix(self, join=True): + parts = super(PlotExclusionAndBestFit2D, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts + def output(self): names = self.create_plot_names(["exclusionbestfit2d", self.get_output_postfix()]) return [self.local_target(name) for name in names] diff --git a/dhi/tasks/gof.py b/dhi/tasks/gof.py index e06b09b2..7ddf9e18 100644 --- a/dhi/tasks/gof.py +++ b/dhi/tasks/gof.py @@ -15,9 +15,10 @@ from dhi.tasks.combine import ( POIPlotTask, CreateWorkspace, ) +from dhi.tasks.snapshot import Snapshot, SnapshotUser -class GoodnessOfFitBase(POITask): +class GoodnessOfFitBase(POITask, SnapshotUser): toys = luigi.IntParameter( default=1, @@ -49,11 +50,19 @@ class GoodnessOfFitBase(POITask): parts["gof"] = self.algorithm return parts + def get_output_postfix(self, join=True): + parts = super(GoodnessOfFitBase, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + if self.frequentist_toys: + parts.append("freqtoys") + + return self.join_postfix(parts) if join else parts + @property def toys_postfix(self): - postfix = "t{}_pt{}".format(self.toys, self.toys_per_task) - if self.frequentist_toys: - postfix += "_freq" + return "t{}_pt{}".format(self.toys, self.toys_per_task) return postfix @@ -82,23 +91,36 @@ class GoodnessOfFit(GoodnessOfFitBase, CombineCommandTask, law.LocalWorkflow, HT def workflow_requires(self): reqs = super(GoodnessOfFit, self).workflow_requires() - reqs["workspace"] = self.requires_from_branch() + reqs["workspace"] = CreateWorkspace.req(self) + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self) return reqs def requires(self): - return CreateWorkspace.req(self) + reqs = {"workspace": CreateWorkspace.req(self)} + if self.use_snapshot: + reqs["snapshot"] = Snapshot.req(self, branch=0) + return reqs def output(self): + parts = [] if self.branch == 0: - postfix = "b0_data" + parts.append("b0_data") else: - postfix = "b{}_toy{}To{}".format(self.branch, self.branch_data[0], self.branch_data[-1]) - if self.frequentist_toys: - postfix += "_freq" - name = self.join_postfix(["gof", self.get_output_postfix(), postfix]) + parts.append("b{}_toy{}To{}".format(self.branch, self.branch_data[0], self.branch_data[-1])) + + name = self.join_postfix(["gof", self.get_output_postfix(), parts]) return self.local_target(name + ".root") def build_command(self): + # get the workspace to use and define snapshot args + if self.use_snapshot: + workspace = self.input()["snapshot"].path + snapshot_args = " --snapshotName MultiDimFit" + else: + workspace = self.input()["workspace"].path + snapshot_args = "" + # toy options toy_opts = "" if self.branch > 0: @@ -106,7 +128,8 @@ class GoodnessOfFit(GoodnessOfFitBase, CombineCommandTask, law.LocalWorkflow, HT if self.frequentist_toys: toy_opts += " --toysFrequentist" - return ( + # build the command + cmd = ( "combine -M GoodnessOfFit {workspace}" " {self.custom_args}" " --verbose 1" @@ -119,16 +142,20 @@ class GoodnessOfFit(GoodnessOfFitBase, CombineCommandTask, law.LocalWorkflow, HT " --setParameters {self.joined_parameter_values}" " --freezeParameters {self.joined_frozen_parameters}" " --freezeNuisanceGroups {self.joined_frozen_groups}" + " {snapshot_args}" " {self.combine_optimization_args}" " && " "mv higgsCombineTest.GoodnessOfFit.mH{self.mass_int}.{self.branch}.root {output}" ).format( self=self, - workspace=self.input().path, + workspace=workspace, output=self.output().path, toy_opts=toy_opts, + snapshot_args=snapshot_args, ) + return cmd + def htcondor_output_postfix(self): postfix = super(GoodnessOfFit, self).htcondor_output_postfix() return "{}__{}".format(postfix, self.toys_postfix) @@ -257,10 +284,7 @@ class PlotMultipleGoodnessOfFits(PlotGoodnessOfFit, MultiDatacardTask, BoxPlotTa @property def toys_postfix(self): tpl_to_str = lambda tpl: "_".join(map(str, tpl)) - postfix = "t{}_pt{}".format(tpl_to_str(self.toys), tpl_to_str(self.toys_per_task)) - if self.frequentist_toys: - postfix += "_freq" - return postfix + return "t{}_pt{}".format(tpl_to_str(self.toys), tpl_to_str(self.toys_per_task)) def requires(self): return [ diff --git a/dhi/tasks/likelihoods.py b/dhi/tasks/likelihoods.py index c4cb1192..f20b398b 100644 --- a/dhi/tasks/likelihoods.py +++ b/dhi/tasks/likelihoods.py @@ -32,6 +32,14 @@ class LikelihoodBase(POIScanTask, SnapshotUser): force_scan_parameters_equal_pois = True allow_parameter_values_in_pois = True + def get_output_postfix(self, join=True): + parts = super(LikelihoodBase, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts + class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow): @@ -54,11 +62,7 @@ class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCo return reqs def output(self): - parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") - - name = self.join_postfix(["likelihood", self.get_output_postfix(), parts]) + ".root" + name = self.join_postfix(["likelihood", self.get_output_postfix()]) + ".root" return self.local_target(name) def build_command(self): @@ -117,11 +121,7 @@ class MergeLikelihoodScan(LikelihoodBase): return LikelihoodScan.req(self) def output(self): - parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") - - name = self.join_postfix(["likelihoods", self.get_output_postfix(), parts]) + ".npz" + name = self.join_postfix(["likelihoods", self.get_output_postfix()]) + ".npz" return self.local_target(name) @law.decorator.log @@ -245,8 +245,6 @@ class PlotLikelihoodScan(LikelihoodBase, POIPlotTask): def output(self): # additional postfix parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") if self.n_pois == 1 and self.y_log: parts.append("log") @@ -481,8 +479,6 @@ class PlotMultipleLikelihoodScans(PlotLikelihoodScan, MultiDatacardTask): def output(self): # additional postfix parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") if self.n_pois == 1 and self.y_log: parts.append("log") @@ -596,8 +592,6 @@ class PlotMultipleLikelihoodScansByModel(PlotLikelihoodScan, MultiHHModelTask): def output(self): # additional postfix parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") if self.n_pois == 1 and self.y_log: parts.append("log") diff --git a/dhi/tasks/limits.py b/dhi/tasks/limits.py index 6c3867d3..c0b68014 100644 --- a/dhi/tasks/limits.py +++ b/dhi/tasks/limits.py @@ -27,6 +27,14 @@ class UpperLimitsBase(POIScanTask, SnapshotUser): force_scan_parameters_unequal_pois = True + def get_output_postfix(self, join=True): + parts = super(UpperLimitsBase, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts + class UpperLimits(UpperLimitsBase, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow): @@ -49,11 +57,7 @@ class UpperLimits(UpperLimitsBase, CombineCommandTask, law.LocalWorkflow, HTCond return reqs def output(self): - parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") - - name = self.join_postfix(["limit", self.get_output_postfix(), parts]) + ".root" + name = self.join_postfix(["limit", self.get_output_postfix()]) + ".root" return self.local_target(name) def build_command(self): @@ -132,11 +136,7 @@ class MergeUpperLimits(UpperLimitsBase): return UpperLimits.req(self) def output(self): - parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") - - name = self.join_postfix(["limits", self.get_output_postfix(), parts]) + ".npz" + name = self.join_postfix(["limits", self.get_output_postfix()]) + ".npz" return self.local_target(name) @law.decorator.log @@ -231,8 +231,6 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask): def output(self): # additional postfix parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") if self.xsec in ["pb", "fb"]: parts.append(self.xsec) if self.br != law.NO_STR: @@ -376,8 +374,6 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask): def output(self): # additional postfix parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") if self.xsec in ["pb", "fb"]: parts.append(self.xsec) if self.br != law.NO_STR: @@ -494,8 +490,6 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask): def output(self): # additional postfix parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") if self.xsec in ["pb", "fb"]: parts.append(self.xsec) if self.br != law.NO_STR: @@ -726,11 +720,17 @@ class PlotUpperLimitsAtPoint(POIPlotTask, SnapshotUser, MultiDatacardTask, BoxPl for datacards in self.multi_datacards ] + def get_output_postfix(self, join=True): + parts = super(PlotUpperLimitsAtPoint, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts + def output(self): # additional postfix parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") if self.xsec in ["pb", "fb"]: parts.append(self.xsec) if self.br != law.NO_STR: @@ -888,8 +888,6 @@ class PlotUpperLimits2D(UpperLimitsBase, POIPlotTask): def output(self): # additional postfix parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") if self.z_log: parts.append("log") diff --git a/dhi/tasks/postfit.py b/dhi/tasks/postfit.py index a25941f4..5c58ac22 100644 --- a/dhi/tasks/postfit.py +++ b/dhi/tasks/postfit.py @@ -73,10 +73,16 @@ class FitDiagnostics(POITask, CombineCommandTask, SnapshotUser, law.LocalWorkflo reqs["snapshot"] = Snapshot.req(self, branch=0) return reqs - def output(self): - parts = [] + def get_output_postfix(self, join=True): + parts = super(FitDiagnostics, self).get_output_postfix(join=False) + if self.use_snapshot: parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts + + def output(self): + parts = [] if not self.skip_b_only: parts.append("withBOnly") if self.skip_save: @@ -141,7 +147,18 @@ class FitDiagnostics(POITask, CombineCommandTask, SnapshotUser, law.LocalWorkflo ) -class PlotPostfitSOverB(POIPlotTask): +class PostfitPlotBase(POIPlotTask, SnapshotUser): + + def get_output_postfix(self, join=True): + parts = super(PostfitPlotBase, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts + + +class PlotPostfitSOverB(PostfitPlotBase): pois = FitDiagnostics.pois bins = law.CSVParameter( @@ -262,7 +279,7 @@ class PlotPostfitSOverB(POIPlotTask): ) -class PlotNuisanceLikelihoodScans(POIPlotTask): +class PlotNuisanceLikelihoodScans(PostfitPlotBase): x_min = copy.copy(POIPlotTask.x_min) x_max = copy.copy(POIPlotTask.x_max) diff --git a/dhi/tasks/pulls_impacts.py b/dhi/tasks/pulls_impacts.py index 06684165..2186f20e 100644 --- a/dhi/tasks/pulls_impacts.py +++ b/dhi/tasks/pulls_impacts.py @@ -38,6 +38,14 @@ class PullsAndImpactsBase(POITask, SnapshotUser): force_n_pois = 1 allow_parameter_values_in_pois = True + def get_output_postfix(self, join=True): + parts = super(PullsAndImpactsBase, self).get_output_postfix(join=False) + + if self.use_snapshot: + parts.append("fromsnapshot") + + return self.join_postfix(parts) if join else parts + class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow): @@ -101,9 +109,6 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow def output(self): parts = [self.branch_data] - if self.use_snapshot: - parts.append("fromsnapshot") - name = self.join_postfix(["fit", self.get_output_postfix(), parts]) + ".root" return self.local_target(name) @@ -196,8 +201,6 @@ class MergePullsAndImpacts(PullsAndImpactsBase): parts = [] if self.mc_stats: parts.append("mcstats") - if self.use_snapshot: - parts.append("fromsnapshot") if self.only_parameters: parts.append("only_" + law.util.create_hash(sorted(self.only_parameters))) if self.skip_parameters: @@ -394,8 +397,6 @@ class PlotPullsAndImpacts(PullsAndImpactsBase, POIPlotTask, BoxPlotTask): parts = [] if self.mc_stats: parts.append("mcstats") - if self.use_snapshot: - parts.append("fromsnapshot") if self.only_parameters: parts.append("only_" + law.util.create_hash(sorted(self.only_parameters))) if self.skip_parameters: diff --git a/dhi/tasks/significances.py b/dhi/tasks/significances.py index afc31dd3..90967657 100644 --- a/dhi/tasks/significances.py +++ b/dhi/tasks/significances.py @@ -44,6 +44,8 @@ class SignificanceBase(POIScanTask, SnapshotUser): if not self.unblinded and self.frequentist_toys: parts.insert(0, ["postfit"]) + if self.use_snapshot: + parts.append("fromsnapshot") return self.join_postfix(parts) if join else parts @@ -69,11 +71,7 @@ class SignificanceScan(SignificanceBase, CombineCommandTask, law.LocalWorkflow, return reqs def output(self): - parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") - - name = self.join_postfix(["significance", self.get_output_postfix(), parts]) + ".root" + name = self.join_postfix(["significance", self.get_output_postfix()]) + ".root" return self.local_target(name) def build_command(self): @@ -126,11 +124,7 @@ class MergeSignificanceScan(SignificanceBase): return SignificanceScan.req(self) def output(self): - parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") - - name = self.join_postfix(["significance", self.get_output_postfix(), parts]) + ".npz" + name = self.join_postfix(["significance", self.get_output_postfix()]) + ".npz" return self.local_target(name) @law.decorator.log @@ -207,8 +201,6 @@ class PlotSignificanceScan(SignificanceBase, POIPlotTask): def output(self): # additional postfix parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") if self.y_log: parts.append("log") @@ -299,8 +291,6 @@ class PlotMultipleSignificanceScans(PlotSignificanceScan, MultiDatacardTask): def output(self): # additional postfix parts = [] - if self.use_snapshot: - parts.append("fromsnapshot") if self.y_log: parts.append("log") -- GitLab From 14a8cfe300b406ed99c69063a1aaa44b5c55a3eb Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Fri, 3 Sep 2021 15:44:29 +0200 Subject: [PATCH 15/21] Artifically extend ranges in nll scans to include expected best fits. --- dhi/config.py | 2 +- dhi/tasks/likelihoods.py | 49 +++++++++++++++++++++++++++++++++++----- setup.sh | 8 +++---- 3 files changed, 48 insertions(+), 11 deletions(-) diff --git a/dhi/config.py b/dhi/config.py index 33562128..698ac305 100644 --- a/dhi/config.py +++ b/dhi/config.py @@ -159,7 +159,7 @@ colors = DotDict( # color sequence for plots with multiple elements color_sequence = ["blue", "red", "green", "grey", "pink", "cyan", "orange", "light_green", "yellow"] -color_sequence += 10 * ["black"] +color_sequence += 10 * ["grey"] # marker sequence for plots with multiple elements marker_sequence = [20, 21, 22, 23, 24, 25, 26, 32, 27, 33, 28, 34, 29, 30] diff --git a/dhi/tasks/likelihoods.py b/dhi/tasks/likelihoods.py index f20b398b..147077b4 100644 --- a/dhi/tasks/likelihoods.py +++ b/dhi/tasks/likelihoods.py @@ -21,7 +21,8 @@ from dhi.tasks.combine import ( CreateWorkspace, ) from dhi.tasks.snapshot import Snapshot, SnapshotUser -from dhi.util import unique_recarray, extend_recarray, convert_dnll2 +from dhi.config import poi_data +from dhi.util import unique_recarray, extend_recarray, convert_dnll2, linspace class LikelihoodBase(POIScanTask, SnapshotUser): @@ -80,6 +81,40 @@ class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCo else: blinded_args = "--seed {self.branch} --toys {self.toys}".format(self=self) + # ensure that ranges of scanned parameters contain their SM values (plus one step) + # in order for the likelihood scan to find the expected minimum and compute all deltaNLL + # values with respect that minimum; otherwise, results of different scans cannot be stitched + # together as they were potentially compared against different minima; this could be + # simplified by https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit/pull/686 which + # would only require to set "--setParameterRangesForGrid {self.joined_scan_ranges}" + ext_ranges = [] + for p, ranges in self.scan_parameters_dict.items(): + # gather data + start, stop, points = ranges[0] + sm_value = poi_data.get(p, {}).get("sm_value", 1.) + step_size = (float(stop - start) / (points - 1)) if points > 1 else 1 + assert step_size > 0 + # decrease the starting point until the sm value is fully contained + while start >= sm_value: + start -= step_size + points += 1 + # increase the stopping point until the sm value is fully contained + while stop <= sm_value: + stop += step_size + points += 1 + # store the extended range + ext_ranges.append((start, stop, points)) + # compute the new n-D point space + ext_space = self._get_scan_linspace(ext_ranges) + # get the point index of the current branch + ext_point = ext_space.index(self.branch_data) + # recreate joined expressions for the combine command + ext_joined_scan_points = ",".join(map(str, (p for _, _, p in ext_ranges))) + ext_joined_scan_ranges = ":".join( + "{}={},{}".format(name, start, stop) + for name, (start, stop, _) in zip(self.scan_parameter_names, ext_ranges) + ) + # build the command cmd = ( "combine -M MultiDimFit {workspace}" @@ -89,12 +124,11 @@ class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCo " {blinded_args}" " --algo grid" " --redefineSignalPOIs {self.joined_pois}" - " --setParameterRangesForGrid {self.joined_scan_ranges}" - " --gridPoints {self.joined_scan_points}" - " --firstPoint {self.branch}" - " --lastPoint {self.branch}" + " --gridPoints {ext_joined_scan_points}" + " --firstPoint {ext_point}" + " --lastPoint {ext_point}" " --alignEdges 1" - " --setParameterRanges {self.joined_parameter_ranges}" + " --setParameterRanges {ext_joined_scan_ranges}:{self.joined_parameter_ranges}" " --setParameters {self.joined_parameter_values}" " --freezeParameters {self.joined_frozen_parameters}" " --freezeNuisanceGroups {self.joined_frozen_groups}" @@ -110,6 +144,9 @@ class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCo output=self.output().path, blinded_args=blinded_args, snapshot_args=snapshot_args, + ext_joined_scan_points=ext_joined_scan_points, + ext_joined_scan_ranges=ext_joined_scan_ranges, + ext_point=ext_point, ) return cmd diff --git a/setup.sh b/setup.sh index 8dea7e01..69b61eb7 100644 --- a/setup.sh +++ b/setup.sh @@ -90,10 +90,10 @@ setup() { cd "$CMSSW_VERSION/src" && \ eval "$( scramv1 runtime -sh )" && \ scram b && \ - git clone --depth 1 --branch parameter_ranges_for_grid https://github.com/riga/HiggsAnalysis-CombinedLimit.git HiggsAnalysis/CombinedLimit && \ + git clone --depth 1 https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit.git HiggsAnalysis/CombinedLimit && \ cd HiggsAnalysis/CombinedLimit && \ git fetch --tags && \ - git checkout parameter_ranges_for_grid && \ + git checkout tags/v8.2.0 && \ chmod ug+x test/diffNuisances.py && \ scram b -j ${DHI_INSTALL_CORES} ) || return "$?" @@ -103,10 +103,10 @@ setup() { echo "installing standalone combine at $DHI_SOFTWARE/HiggsAnalysis/CombinedLimit" cd "$DHI_SOFTWARE" rm -rf HiggsAnalysis/CombinedLimit - git clone --depth 1 --branch parameter_ranges_for_grid https://github.com/riga/HiggsAnalysis-CombinedLimit.git HiggsAnalysis/CombinedLimit && \ + git clone --depth 1 https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit.git HiggsAnalysis/CombinedLimit && \ cd HiggsAnalysis/CombinedLimit && \ git fetch --tags && \ - git checkout parameter_ranges_for_grid && \ + git checkout tags/v8.2.0 && \ chmod ug+x test/diffNuisances.py && \ source env_standalone.sh "" && \ make -j ${DHI_INSTALL_CORES} && \ -- GitLab From 604bfa40fec8761ad1a68ef5dcd53aa6b8fa53ce Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Fri, 3 Sep 2021 18:58:39 +0200 Subject: [PATCH 16/21] Refactor postfit nuisance likelihood scans. --- dhi/plots/likelihoods.py | 41 +++++++++++++++++++++++++-------- dhi/plots/pulls_impacts.py | 42 +++------------------------------- dhi/plots/util.py | 47 +++++++++++++++++++++++++++++++++++++- dhi/tasks/postfit.py | 18 ++++++++++++++- 4 files changed, 97 insertions(+), 51 deletions(-) diff --git a/dhi/plots/likelihoods.py b/dhi/plots/likelihoods.py index 8a13fb3d..638f442d 100644 --- a/dhi/plots/likelihoods.py +++ b/dhi/plots/likelihoods.py @@ -21,7 +21,7 @@ from dhi.util import ( ) from dhi.plots.util import ( use_style, create_model_parameters, fill_hist_from_points, get_contours, get_y_range, - infer_binning_from_grid, get_contour_box, + infer_binning_from_grid, get_contour_box, make_parameter_label_map, ) @@ -817,6 +817,8 @@ def plot_nuisance_likelihood_scans( only_parameters=None, parameters_per_page=1, sort_max=False, + show_diff=False, + labels=None, scan_points=101, x_min=-2., x_max=2, @@ -827,7 +829,7 @@ def plot_nuisance_likelihood_scans( campaign=None, paper=False, ): - """ + r""" Creates a plot showing the change of the negative log-likelihood, obtained *poi*, when varying values of nuisance paramaters and saves it at *paths*. The calculation of the likelihood change requires the RooFit *workspace* to read the model config, a RooDataSet *dataset* to construct @@ -838,10 +840,21 @@ def plot_nuisance_likelihood_scans( Nuisances to skip, or to show exclusively can be configured via *skip_parameters* and *only_parameters*, respectively, which can be lists of patterns. *parameters_per_page* defines the number of parameter curves that are drawn in the same canvas page. When *sort_max* is - *True*, the parameter are sorted by their highest likelihood change in the full scan range. The - scan range and granularity is set via *scan_points*, *x_min* and *x_max*. *y_min* and *y_max* - define the range of the y-axis, which is plotted with a logarithmic scale when *y_log* is - *True*. *model_parameters* can be a dictionary of key-value pairs of model parameters. + *True*, the parameter are sorted by their highest likelihood change in the full scan range. + By default, the x-axis shows absolute variations of the nuisance parameters (in terms of the + prefit range). When *show_diff* is *True*, it shows differences with respect to the best fit + value instead. + + *labels* should be a dictionary or a json file containing a dictionary that maps nuisances names + to labels shown in the plot, a python file containing a function named "rename_nuisance", or a + function itself. When it is a dictionary and a key starts with "^" and ends with "$" it is + interpreted as a regular expression. Matched groups can be reused in the substituted name via + '\n' to reference the n-th group (following the common re.sub format). When it is a function, it + should accept the current nuisance label as a single argument and return the new value. + + The scan range and granularity is set via *scan_points*, *x_min* and *x_max*. *y_min* and + *y_max* define the range of the y-axis, which is plotted with a logarithmic scale when *y_log* + is *True*. *model_parameters* can be a dictionary of key-value pairs of model parameters. *campaign* should refer to the name of a campaign label defined in *dhi.config.campaign_labels*. When *paper* is *True*, certain plot configurations are adjusted for use in publications. @@ -897,8 +910,9 @@ def plot_nuisance_likelihood_scans( x_values, y_values = [], [] print("scanning parameter {}".format(name)) for x in scan_values: - param.setVal(param_bf + (pre_u if x >= 0 else -pre_d) * x) - x_values.append(param.getVal()) + x_diff = x * (pre_u if x >= 0 else -pre_d) + param.setVal(param_bf + x_diff) + x_values.append(x_diff if show_diff else (param_bf + x_diff)) y_values.append(2 * (nll.getVal() - nll_base)) curve_data[name] = (x_values, y_values) @@ -918,6 +932,9 @@ def plot_nuisance_likelihood_scans( else: param_groups.append([name]) + # prepare labels + labels = make_parameter_label_map(param_names, labels) + # go through nuisances canvas = None for names in param_groups: @@ -941,7 +958,10 @@ def plot_nuisance_likelihood_scans( y_max, log=y_log) # dummy histogram to control axes - x_title = "(#theta - #theta_{best}) / #Delta#theta_{pre}" + if show_diff: + x_title = "(#theta - #theta_{best}) / #Delta#theta_{pre}" + else: + x_title = "#theta / #Delta#theta_{pre}" y_title = "Change in -2 log(L)" h_dummy = ROOT.TH1F("dummy", ";{};{}".format(x_title, y_title), 1, x_min, x_max) r.setup_hist(h_dummy, pad=pad, props={"LineWidth": 0, "Minimum": _y_min, "Maximum": _y_max}) @@ -967,7 +987,8 @@ def plot_nuisance_likelihood_scans( g_nll = create_tgraph(len(x), x, y) r.setup_graph(g_nll, props={"LineWidth": 2, "LineStyle": 1}, color=colors[col]) draw_objs.append((g_nll, "SAME,C")) - legend_entries.append((g_nll, to_root_latex(name), "L")) + label = to_root_latex(labels.get(name, name)) + legend_entries.append((g_nll, label, "L")) # legend legend_cols = min(int(math.ceil(len(legend_entries) / 4.)), 3) diff --git a/dhi/plots/pulls_impacts.py b/dhi/plots/pulls_impacts.py index 32312d11..9afaaf9c 100644 --- a/dhi/plots/pulls_impacts.py +++ b/dhi/plots/pulls_impacts.py @@ -6,7 +6,6 @@ Pull and impact plots using ROOT. import os import json -import re import math import array @@ -15,9 +14,9 @@ import numpy as np from dhi.config import poi_data, campaign_labels, colors, cms_postfix from dhi.util import ( - import_ROOT, import_file, multi_match, to_root_latex, linspace, colored, make_list, + import_ROOT, multi_match, to_root_latex, linspace, colored, make_list, ) -from dhi.plots.util import use_style +from dhi.plots.util import use_style, make_parameter_label_map colors = colors.root @@ -143,42 +142,7 @@ def plot_pulls_impacts( params = [params[i] for i in indices] # prepare labels - if isinstance(labels, six.string_types): - labels = os.path.expandvars(os.path.expanduser(labels)) - # try to load a renaming function called "rename_nuisance" - if labels.endswith(".py"): - labels = import_file(labels, attr="rename_nuisance") - if not callable(labels): - raise Exception("rename_nuisance loaded from {} is not callable".format(labels)) - else: - with open(labels, "r") as f: - labels = json.load(f) - elif not labels: - labels = {} - - if not isinstance(labels, dict): - # labels is a renaming function, call it for all parameters and store the result when - # names changed - _labels = {} - for param in params: - new_name = labels(param.name) - if new_name and new_name != param.name: - _labels[param.name] = new_name - labels = _labels - else: - # expand regular expressions through eager interpolation using parameter names - for k, v in labels.items(): - if not k.startswith("^") or not k.endswith("$"): - continue - for param in params: - # skip explicit translations, effectively giving them priority - if param.name in labels: - continue - # apply the pattern - new_name = re.sub(k, v, param.name) - # store a translation label when the name has changed - if new_name != param.name: - labels[param.name] = new_name + labels = make_parameter_label_map([param.name for param in params], labels) # determine the number of pages if parameters_per_page < 1: diff --git a/dhi/plots/util.py b/dhi/plots/util.py index 9f26d3e0..c9526c56 100644 --- a/dhi/plots/util.py +++ b/dhi/plots/util.py @@ -4,9 +4,12 @@ Different helpers and ROOT style configurations to be used with plotlib. """ +import os +import re import math import array import uuid +import json import functools import itertools import contextlib @@ -17,7 +20,7 @@ import numpy as np import scipy.interpolate from dhi.config import poi_data, br_hh_names -from dhi.util import import_ROOT, try_int, to_root_latex, make_list +from dhi.util import import_ROOT, import_file, try_int, to_root_latex, make_list _styles = {} @@ -131,6 +134,48 @@ def determine_limit_digits(limit, is_xsec=False): return 0 +def make_parameter_label_map(parameter_names, labels=None): + # prepare labels + if isinstance(labels, six.string_types): + labels = os.path.expandvars(os.path.expanduser(labels)) + # try to load a renaming function called "rename_nuisance" + if labels.endswith(".py"): + labels = import_file(labels, attr="rename_nuisance") + if not callable(labels): + raise Exception("rename_nuisance loaded from {} is not callable".format(labels)) + else: + with open(labels, "r") as f: + labels = json.load(f) + elif not labels: + labels = {} + + if not isinstance(labels, dict): + # labels is a renaming function, call it for all parameters and store the result when + # names changed + _labels = {} + for name in parameter_names: + new_name = labels(name) + if new_name: + _labels[name] = new_name + labels = _labels + else: + # expand regular expressions through eager interpolation using parameter names + for k, v in labels.items(): + if not k.startswith("^") or not k.endswith("$"): + continue + for name in parameter_names: + # skip explicit translations, effectively giving them priority + if name in labels: + continue + # apply the pattern + new_name = re.sub(k, v, name) + # store a translation label when set + if new_name: + labels[name] = new_name + + return labels + + def get_y_range(y_min_value, y_max_value, y_min=None, y_max=None, log=False, y_min_log=1e-3, top_margin=0.38, visible_margin=0.4): if log: diff --git a/dhi/tasks/postfit.py b/dhi/tasks/postfit.py index 5c58ac22..f7a4f9b6 100644 --- a/dhi/tasks/postfit.py +++ b/dhi/tasks/postfit.py @@ -13,6 +13,7 @@ import luigi from dhi.tasks.base import HTCondorWorkflow, view_output_plots from dhi.tasks.combine import CombineCommandTask, POITask, POIPlotTask, CreateWorkspace from dhi.tasks.snapshot import Snapshot, SnapshotUser +from dhi.tasks.pulls_impacts import PlotPullsAndImpacts class SAVEFLAGS(str, enum.Enum): @@ -313,6 +314,12 @@ class PlotNuisanceLikelihoodScans(PostfitPlotBase): description="when True, sort parameters by their hightest likelihood change in the scan " "range; mostly useful when the number of parameters per page is > 1; default: False", ) + show_diff = luigi.BoolParameter( + default=False, + description="when True, the x-axis shows differences of nuisance parameters with respect " + "to the best fit value instead of absolute values; default: False", + ) + labels = PlotPullsAndImpacts.labels mc_stats_patterns = ["*prop_bin*"] @@ -323,10 +330,17 @@ class PlotNuisanceLikelihoodScans(PostfitPlotBase): force_n_pois = 1 def requires(self): - return FitDiagnostics.req(self, skip_save=("WithUncertainties",)) + # normally, we would require FitDiagnostics without saved uncertainties no matter what, + # but since it could be already complete, use it when it does exist + full_fd = FitDiagnostics.req(self) + if full_fd.complete(): + return full_fd + return FitDiagnostics.req(self, skip_save=("WithUncertainties",), _prefer_cli=["skip_save"]) def output(self): parts = ["nlls", "{}To{}".format(self.x_min, self.x_max), self.get_output_postfix()] + if self.show_diff: + parts.append("diffs") if self.y_log: parts.append("log") if self.sort_max: @@ -375,6 +389,8 @@ class PlotNuisanceLikelihoodScans(PostfitPlotBase): skip_parameters=skip_parameters, parameters_per_page=self.parameters_per_page, sort_max=self.sort_max, + show_diff=self.show_diff, + labels=None if self.labels == law.NO_STR else self.labels, x_min=self.x_min, x_max=self.x_max, y_min=self.get_axis_limit("y_min"), -- GitLab From c839202977f9f913a8df463aab12df012b4b72fc Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Mon, 6 Sep 2021 10:47:18 +0200 Subject: [PATCH 17/21] Improve nuisance likelihood plotting. --- dhi/plots/likelihoods.py | 30 ++++++++++++++++-------------- dhi/plots/util.py | 30 +++++++++++++++--------------- modules/law | 2 +- setup.sh | 2 +- 4 files changed, 33 insertions(+), 31 deletions(-) diff --git a/dhi/plots/likelihoods.py b/dhi/plots/likelihoods.py index 638f442d..33a7a6a0 100644 --- a/dhi/plots/likelihoods.py +++ b/dhi/plots/likelihoods.py @@ -49,10 +49,10 @@ def plot_likelihood_scan_1d( paper=False, ): """ - Creates a likelihood plot of the 1D scan of a *poi* and saves it at *paths*. *values* should be a - mapping to lists of values or a record array with keys "<poi_name>" and "dnll2". *theory_value* - can be a 3-tuple denoting the nominal theory prediction of the POI and its up and down - uncertainties which is drawn as a vertical bar. + Creates a likelihood plot of the 1D scan of a *poi* and saves it at *paths*. *values* should be + a mapping to lists of values or a record array with keys "<poi_name>" and "dnll2". + *theory_value* can be a 3-tuple denoting the nominal theory prediction of the POI and its up and + down uncertainties which is drawn as a vertical bar. When *poi_min* is set, it should be the value of the poi that leads to the best likelihood. Otherwise, it is estimated from the interpolated curve. When *show_best_fit* @@ -819,7 +819,7 @@ def plot_nuisance_likelihood_scans( sort_max=False, show_diff=False, labels=None, - scan_points=101, + scan_points=401, x_min=-2., x_max=2, y_min=None, @@ -830,12 +830,12 @@ def plot_nuisance_likelihood_scans( paper=False, ): r""" - Creates a plot showing the change of the negative log-likelihood, obtained *poi*, when varying - values of nuisance paramaters and saves it at *paths*. The calculation of the likelihood change - requires the RooFit *workspace* to read the model config, a RooDataSet *dataset* to construct - the functional likelihood, and the output file *fit_diagnostics_path* of the combine fit - diagnostics for reading pre- and post-fit parameters for the fit named *fit_name*, defaulting - to ``"fit_s"``. + Creates a plot showing the change of the negative log-likelihood, previously obtained for a + *poi*, when varying values of nuisance paramaters and saves it at *paths*. The calculation of + the likelihood change requires the RooFit *workspace* to read the model config, a RooDataSet + *dataset* to construct the functional likelihood, and the output file *fit_diagnostics_path* of + the combine fit diagnostics for reading pre- and post-fit parameters for the fit named + *fit_name*, defaulting to ``"fit_s"``. Nuisances to skip, or to show exclusively can be configured via *skip_parameters* and *only_parameters*, respectively, which can be lists of patterns. *parameters_per_page* defines @@ -892,8 +892,10 @@ def plot_nuisance_likelihood_scans( param_names.append(param_name) print("preparing scans of {} parameter(s)".format(len(param_names))) - # prepare the scan values, ensure that 0 is contained - scan_values = np.linspace(x_min, x_max, scan_points).tolist() + # prepare the scan values, extend the range by 10 points in each directon, ensure 0 is contained + assert scan_points > 1 + width = float(x_max - x_min) / (scan_points - 1) + scan_values = np.linspace(x_min - 10 * width, x_max + 10 * width, scan_points + 20).tolist() if 0 not in scan_values: scan_values = sorted(scan_values + [0.]) @@ -962,7 +964,7 @@ def plot_nuisance_likelihood_scans( x_title = "(#theta - #theta_{best}) / #Delta#theta_{pre}" else: x_title = "#theta / #Delta#theta_{pre}" - y_title = "Change in -2 log(L)" + y_title = "-2 #Delta log(L)" h_dummy = ROOT.TH1F("dummy", ";{};{}".format(x_title, y_title), 1, x_min, x_max) r.setup_hist(h_dummy, pad=pad, props={"LineWidth": 0, "Minimum": _y_min, "Maximum": _y_max}) draw_objs = [(h_dummy, "HIST")] diff --git a/dhi/plots/util.py b/dhi/plots/util.py index c9526c56..133c2081 100644 --- a/dhi/plots/util.py +++ b/dhi/plots/util.py @@ -433,6 +433,21 @@ def _get_contour(hist, level): return contours +def get_contour_box(graphs): + assert graphs + + x_values, y_values = [], [] + for g in graphs: + x, y = get_graph_points(g, errors=False) + x_values.extend(x) + y_values.extend(y) + + if not x_values or not y_values: + return None, None, None, None + + return min(x_values), max(x_values), min(y_values), max(y_values) + + def get_graph_points(g, errors=False): ROOT = import_ROOT() @@ -470,21 +485,6 @@ def get_graph_points(g, errors=False): return x_values, y_values -def get_contour_box(graphs): - assert graphs - - x_values, y_values = [], [] - for g in graphs: - x, y = get_graph_points(g, errors=False) - x_values.extend(x) - y_values.extend(y) - - if not x_values or not y_values: - return None, None, None, None - - return min(x_values), max(x_values), min(y_values), max(y_values) - - def repeat_graph(g, n): points = sum((n * [p] for p in zip(*get_graph_points(g))), []) diff --git a/modules/law b/modules/law index 8298b62f..250aab4d 160000 --- a/modules/law +++ b/modules/law @@ -1 +1 @@ -Subproject commit 8298b62f07a3ab177e0f95a018d33b32f68453ee +Subproject commit 250aab4d27217fa713e899cfcd80256fafcf91ec diff --git a/setup.sh b/setup.sh index 69b61eb7..6d133caf 100644 --- a/setup.sh +++ b/setup.sh @@ -367,7 +367,7 @@ interactive_setup() { # prepare the tmp env file if ! $setup_is_default && ! $env_file_exists; then - rm -rf "$env_file_tmp" + rm -f "$env_file_tmp" mkdir -p "$( dirname "$env_file_tmp" )" echo -e "\nStart querying variables for setup '\x1b[0;49;35m$setup_name\x1b[0m'" -- GitLab From d1842c85c23608e911217af8db5a30e02950fefc Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Tue, 7 Sep 2021 10:22:49 +0200 Subject: [PATCH 18/21] Update docs for new snapshots feature. --- .../snippets/eftupperlimits_param_tab.md | 7 -- .../snippets/fitdiagnostics_param_tab.md | 2 +- .../snippets/goodnessoffit_param_tab.md | 2 +- .../snippets/likelihoodscan_param_tab.md | 2 +- .../mergeeftbenchmarklimits_param_tab.md | 2 +- .../snippets/mergeeftupperlimits_param_tab.md | 7 -- .../snippets/mergegoodnessoffit_param_tab.md | 2 +- .../snippets/mergelikelihoodscan_param_tab.md | 2 +- .../mergepullsandimpacts_param_tab.md | 2 +- .../mergesignificancescan_param_tab.md | 2 +- .../snippets/mergeupperlimits_param_tab.md | 2 +- docs/content/snippets/parameters.md | 2 +- .../snippets/ploteftupperlimits_param_tab.md | 7 -- .../plotexclusionandbestfit2d_param_tab.md | 2 +- .../plotexclusionandbestfit_param_tab.md | 2 +- .../snippets/plotgoodnessoffit_param_tab.md | 2 +- .../snippets/plotlikelihoodscan_param_tab.md | 2 +- .../plotmultiplegoodnessoffits_param_tab.md | 2 +- .../plotmultiplelikelihoodscans_param_tab.md | 2 +- ...ultiplelikelihoodscansbymodel_param_tab.md | 2 +- ...plotmultiplesignificancescans_param_tab.md | 2 +- .../plotmultipleupperlimits_param_tab.md | 2 +- .../plotnuisancelikelihoodscans_param_tab.md | 2 +- .../snippets/plotpostfitsoverb_param_tab.md | 2 +- .../snippets/plotpullsandimpacts_param_tab.md | 2 +- .../plotsignificancescan_param_tab.md | 2 +- .../snippets/plotupperlimits_param_tab.md | 2 +- .../plotupperlimitsatpoint_param_tab.md | 2 +- .../snippets/pullsandimpacts_param_tab.md | 2 +- .../snippets/significancescan_param_tab.md | 2 +- docs/content/snippets/snapshot_param_tab.md | 8 ++ .../content/snippets/upperlimits_param_tab.md | 2 +- docs/content/tasks/snapshot.md | 86 +++++++++++++++++++ docs/mkdocs.yml | 3 +- 34 files changed, 124 insertions(+), 50 deletions(-) delete mode 100644 docs/content/snippets/eftupperlimits_param_tab.md delete mode 100644 docs/content/snippets/mergeeftupperlimits_param_tab.md delete mode 100644 docs/content/snippets/ploteftupperlimits_param_tab.md create mode 100644 docs/content/snippets/snapshot_param_tab.md create mode 100644 docs/content/tasks/snapshot.md diff --git a/docs/content/snippets/eftupperlimits_param_tab.md b/docs/content/snippets/eftupperlimits_param_tab.md deleted file mode 100644 index 22a68686..00000000 --- a/docs/content/snippets/eftupperlimits_param_tab.md +++ /dev/null @@ -1,7 +0,0 @@ -The `EFTBenchmarkLimits` computes the limits of each benchmark datacard. - -<div class="dhi_parameter_table"> - ---8<-- "content/snippets/parameters.md@-2,21,61,48,34,17,18,47,60" - -</div> diff --git a/docs/content/snippets/fitdiagnostics_param_tab.md b/docs/content/snippets/fitdiagnostics_param_tab.md index cb856a44..1fe1d1e9 100644 --- a/docs/content/snippets/fitdiagnostics_param_tab.md +++ b/docs/content/snippets/fitdiagnostics_param_tab.md @@ -2,6 +2,6 @@ The `PostFitShapes` task takes the workspace created by `CreateWorkspace` and ru <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,17,18,47,34,57,58" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,17,18,74,47,34,57,58" </div> diff --git a/docs/content/snippets/goodnessoffit_param_tab.md b/docs/content/snippets/goodnessoffit_param_tab.md index 38b5c3a2..c6a5ef16 100644 --- a/docs/content/snippets/goodnessoffit_param_tab.md +++ b/docs/content/snippets/goodnessoffit_param_tab.md @@ -2,6 +2,6 @@ The `GoodnessOfFit` task performs the tests for either data (`--branch 0`) and m <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,51,49,50,53,34,13,36" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,74,51,49,50,53,34,13,36" </div> diff --git a/docs/content/snippets/likelihoodscan_param_tab.md b/docs/content/snippets/likelihoodscan_param_tab.md index eeecedac..2ce7ee59 100644 --- a/docs/content/snippets/likelihoodscan_param_tab.md +++ b/docs/content/snippets/likelihoodscan_param_tab.md @@ -3,6 +3,6 @@ It provides some handy cli parameters to manipulate POIs, ranges and other optio <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18,47,34,35,13,36" +--8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18,74,47,34,35,13,36" </div> diff --git a/docs/content/snippets/mergeeftbenchmarklimits_param_tab.md b/docs/content/snippets/mergeeftbenchmarklimits_param_tab.md index 66d438d3..17db11a8 100644 --- a/docs/content/snippets/mergeeftbenchmarklimits_param_tab.md +++ b/docs/content/snippets/mergeeftbenchmarklimits_param_tab.md @@ -1,4 +1,4 @@ -The `MergeEFTUpperLimits` task collects the limits from each of the `EFTUpperLimits` tasks and merges them into a single file. +The `MergeEFTBenchmarkLimits` task collects the limits from each of the `EFTBenchmarkLimits` tasks and merges them into a single file. <div class="dhi_parameter_table"> diff --git a/docs/content/snippets/mergeeftupperlimits_param_tab.md b/docs/content/snippets/mergeeftupperlimits_param_tab.md deleted file mode 100644 index 2c3761ff..00000000 --- a/docs/content/snippets/mergeeftupperlimits_param_tab.md +++ /dev/null @@ -1,7 +0,0 @@ -The `MergeEFTUpperLimits` task collects the limits from each of the `EFTUpperLimits` tasks and merges them into a single file. - -<div class="dhi_parameter_table"> - ---8<-- "content/snippets/parameters.md@-2,21,61,48,34,17,18,47,60" - -</div> diff --git a/docs/content/snippets/mergegoodnessoffit_param_tab.md b/docs/content/snippets/mergegoodnessoffit_param_tab.md index 545796ce..c2b7c6e6 100644 --- a/docs/content/snippets/mergegoodnessoffit_param_tab.md +++ b/docs/content/snippets/mergegoodnessoffit_param_tab.md @@ -2,6 +2,6 @@ The `MergeGoodnessOfFit` task collects the test statistic results from each of t <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,51,49,50,53" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,74,51,49,50,53" </div> diff --git a/docs/content/snippets/mergelikelihoodscan_param_tab.md b/docs/content/snippets/mergelikelihoodscan_param_tab.md index 502a425f..59c25ec6 100644 --- a/docs/content/snippets/mergelikelihoodscan_param_tab.md +++ b/docs/content/snippets/mergelikelihoodscan_param_tab.md @@ -2,6 +2,6 @@ The `MergeLikelihoodScan` task collects the fit results from each of the `Likeli <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18" +--8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18,74" </div> diff --git a/docs/content/snippets/mergepullsandimpacts_param_tab.md b/docs/content/snippets/mergepullsandimpacts_param_tab.md index 669ab647..f28c45c5 100644 --- a/docs/content/snippets/mergepullsandimpacts_param_tab.md +++ b/docs/content/snippets/mergepullsandimpacts_param_tab.md @@ -2,6 +2,6 @@ The `MergePullsAndImpacts` task collects the fit results from the `PullsAndImpac <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,74,17,18,56,28,33,67" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,74,17,18,74,56,28,33,67" </div> diff --git a/docs/content/snippets/mergesignificancescan_param_tab.md b/docs/content/snippets/mergesignificancescan_param_tab.md index 53e00a3f..e25d34d7 100644 --- a/docs/content/snippets/mergesignificancescan_param_tab.md +++ b/docs/content/snippets/mergesignificancescan_param_tab.md @@ -2,6 +2,6 @@ The `MergeSignificanceScan` task collects the fit results from each of the `Sign <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,53,48,17,18" +--8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,53,48,17,18,74" </div> diff --git a/docs/content/snippets/mergeupperlimits_param_tab.md b/docs/content/snippets/mergeupperlimits_param_tab.md index 161709d2..75b11805 100644 --- a/docs/content/snippets/mergeupperlimits_param_tab.md +++ b/docs/content/snippets/mergeupperlimits_param_tab.md @@ -2,6 +2,6 @@ The `MergeUpperLimits` task collects the fit results from each of the `UpperLimi <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18" +--8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18,74" </div> diff --git a/docs/content/snippets/parameters.md b/docs/content/snippets/parameters.md index 91a75567..18cb41b6 100644 --- a/docs/content/snippets/parameters.md +++ b/docs/content/snippets/parameters.md @@ -71,4 +71,4 @@ | `--prefit` | Draw prefit histograms and uncertainties instead of postfit ones. Defaults to `False`. | | `--show-best-fit-error` | When `True`, the uncertainty on the POIs best fit value is shown as well. Defaults to `True`. | | `--recompute-best-fit` | When `True`, the best fit value is recomputed from `scipy.interpolate` and `scipy.minimize`. Defaults to `False`. | -| `--use-snapshot` | When `True`, uses the initial fit as snapshot for nuisance fits. Recommended when running unblinded. Defaults to `False`. | +| `--use-snapshot` | When `True`, performs a [MultiDimFit snapshot](snapshot.md) first and uses it instead of the initial workspace. When running unblinded, consider using this option. Defaults to `False`. | diff --git a/docs/content/snippets/ploteftupperlimits_param_tab.md b/docs/content/snippets/ploteftupperlimits_param_tab.md deleted file mode 100644 index bb32a49f..00000000 --- a/docs/content/snippets/ploteftupperlimits_param_tab.md +++ /dev/null @@ -1,7 +0,0 @@ -The `PlotEFTUpperLimits` task collects the results from the `MergeEFTUpperLimits` task and visualizes them in a plot. - -<div class="dhi_parameter_table"> - ---8<-- "content/snippets/parameters.md@-2,21,61,48,17,18,10,11,3,4,7,8,9,60" - -</div> diff --git a/docs/content/snippets/plotexclusionandbestfit2d_param_tab.md b/docs/content/snippets/plotexclusionandbestfit2d_param_tab.md index cdf6e57b..ed848fae 100644 --- a/docs/content/snippets/plotexclusionandbestfit2d_param_tab.md +++ b/docs/content/snippets/plotexclusionandbestfit2d_param_tab.md @@ -2,6 +2,6 @@ The `PlotExclusionAndBestFit2D` task collects the fit results from the `MergeUpp <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,55,72,73,48,17,18,12,39,40,11,3,4,5,6,7,8" +--8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,55,72,73,48,17,18,74,12,39,40,11,3,4,5,6,7,8" </div> diff --git a/docs/content/snippets/plotexclusionandbestfit_param_tab.md b/docs/content/snippets/plotexclusionandbestfit_param_tab.md index 556277f8..d666df3a 100644 --- a/docs/content/snippets/plotexclusionandbestfit_param_tab.md +++ b/docs/content/snippets/plotexclusionandbestfit_param_tab.md @@ -2,6 +2,6 @@ The `PlotExclusionAndBestFit` task collects the fit results from the `MergeUpper <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,55,72,73,48,17,18,3,4,22,23,12,5,6" +--8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,55,72,73,48,17,18,74,3,4,22,23,12,5,6" </div> diff --git a/docs/content/snippets/plotgoodnessoffit_param_tab.md b/docs/content/snippets/plotgoodnessoffit_param_tab.md index 64fbb27b..9cc94e54 100644 --- a/docs/content/snippets/plotgoodnessoffit_param_tab.md +++ b/docs/content/snippets/plotgoodnessoffit_param_tab.md @@ -2,6 +2,6 @@ The `PlotGoodnessOfFit` task collects the results from `MergeGoodnessOfFit` and <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,51,49,50,53,3,4,5,6,7,8" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,74,51,49,50,53,3,4,5,6,7,8" </div> diff --git a/docs/content/snippets/plotlikelihoodscan_param_tab.md b/docs/content/snippets/plotlikelihoodscan_param_tab.md index e2d4f51b..afaec455 100644 --- a/docs/content/snippets/plotlikelihoodscan_param_tab.md +++ b/docs/content/snippets/plotlikelihoodscan_param_tab.md @@ -3,6 +3,6 @@ It provides some handy cli parameters to manipulate the visualisation. <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,48,17,18,3,4,63,55,72,73,62,5,6,7,8,9,24,25" +--8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,48,17,18,74,3,4,63,55,72,73,62,5,6,7,8,9,24,25" </div> diff --git a/docs/content/snippets/plotmultiplegoodnessoffits_param_tab.md b/docs/content/snippets/plotmultiplegoodnessoffits_param_tab.md index 95ede59d..1d5c4b25 100644 --- a/docs/content/snippets/plotmultiplegoodnessoffits_param_tab.md +++ b/docs/content/snippets/plotmultiplegoodnessoffits_param_tab.md @@ -2,6 +2,6 @@ The `PlotMultipleGoodnessOfFits` task collects results from multiple `MergeGoodn <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,51,49,50,53,3,4,22,23,5,6,7,8" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,17,18,74,51,49,50,53,3,4,22,23,5,6,7,8" </div> diff --git a/docs/content/snippets/plotmultiplelikelihoodscans_param_tab.md b/docs/content/snippets/plotmultiplelikelihoodscans_param_tab.md index 3a127048..f8c9e623 100644 --- a/docs/content/snippets/plotmultiplelikelihoodscans_param_tab.md +++ b/docs/content/snippets/plotmultiplelikelihoodscans_param_tab.md @@ -2,6 +2,6 @@ The `PlotMultipleLikelihoodScans` task collects the fit results from multiple `M <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,48,17,18,3,4,22,23,55,73,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,48,17,18,74,3,4,22,23,55,73,5,6,7,8,9" </div> diff --git a/docs/content/snippets/plotmultiplelikelihoodscansbymodel_param_tab.md b/docs/content/snippets/plotmultiplelikelihoodscansbymodel_param_tab.md index 05dba9c7..40d53fff 100644 --- a/docs/content/snippets/plotmultiplelikelihoodscansbymodel_param_tab.md +++ b/docs/content/snippets/plotmultiplelikelihoodscansbymodel_param_tab.md @@ -2,6 +2,6 @@ The `PlotMultipleLikelihoodScansByModel` task collects the fit results from mult <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,17,18,3,4,45,46,55,73,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,17,18,74,3,4,45,46,55,73,5,6,7,8,9" </div> diff --git a/docs/content/snippets/plotmultiplesignificancescans_param_tab.md b/docs/content/snippets/plotmultiplesignificancescans_param_tab.md index be53d3f4..fa06cde0 100644 --- a/docs/content/snippets/plotmultiplesignificancescans_param_tab.md +++ b/docs/content/snippets/plotmultiplesignificancescans_param_tab.md @@ -2,6 +2,6 @@ The `PlotMultipleSignificanceScans` task collects the fit results from multiple <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,53,48,17,18,3,4,64,22,23,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,53,48,17,18,74,3,4,64,22,23,5,6,7,8,9" </div> diff --git a/docs/content/snippets/plotmultipleupperlimits_param_tab.md b/docs/content/snippets/plotmultipleupperlimits_param_tab.md index 9f98ccd8..26c26221 100644 --- a/docs/content/snippets/plotmultipleupperlimits_param_tab.md +++ b/docs/content/snippets/plotmultipleupperlimits_param_tab.md @@ -2,6 +2,6 @@ The `PlotMultipleUpperLimits` task collects the fit results from multiple `Merge <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,17,18,10,11,3,4,22,23,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,21,19,16,54,14,66,12,17,18,74,10,11,3,4,22,23,5,6,7,8,9" </div> diff --git a/docs/content/snippets/plotnuisancelikelihoodscans_param_tab.md b/docs/content/snippets/plotnuisancelikelihoodscans_param_tab.md index 79e6e0ec..ac113298 100644 --- a/docs/content/snippets/plotnuisancelikelihoodscans_param_tab.md +++ b/docs/content/snippets/plotnuisancelikelihoodscans_param_tab.md @@ -2,6 +2,6 @@ The `PlotNuisanceLikelihoodScans` task collects results from the `FitDiagnostics <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,48,14,66,12,17,18,56,28,27,59,3,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,20,19,16,48,14,66,12,17,18,74,56,28,27,59,3,5,6,7,8,9" </div> diff --git a/docs/content/snippets/plotpostfitsoverb_param_tab.md b/docs/content/snippets/plotpostfitsoverb_param_tab.md index 13b5ea57..431abf3e 100644 --- a/docs/content/snippets/plotpostfitsoverb_param_tab.md +++ b/docs/content/snippets/plotpostfitsoverb_param_tab.md @@ -2,6 +2,6 @@ The `PlotPostfitSOverB` task reads the fit diagnostics result from the `PostFitS <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,12,48,17,18,43,65,3,4,71,69,44,70,6,7,41,42" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,12,48,17,18,74,43,65,3,4,71,69,44,70,6,7,41,42" </div> diff --git a/docs/content/snippets/plotpullsandimpacts_param_tab.md b/docs/content/snippets/plotpullsandimpacts_param_tab.md index 2ef2e2bb..49a81eeb 100644 --- a/docs/content/snippets/plotpullsandimpacts_param_tab.md +++ b/docs/content/snippets/plotpullsandimpacts_param_tab.md @@ -2,6 +2,6 @@ The `PlotPullsAndImpacts` task collects fit results from `MergePullsAndImpacts` <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,74,17,18,65,68,56,28,29,33,27,52,30,31,32,3,4,67" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,17,18,74,65,68,56,28,29,33,27,52,30,31,32,3,4,67" </div> diff --git a/docs/content/snippets/plotsignificancescan_param_tab.md b/docs/content/snippets/plotsignificancescan_param_tab.md index 79f8dbae..aafb94d5 100644 --- a/docs/content/snippets/plotsignificancescan_param_tab.md +++ b/docs/content/snippets/plotsignificancescan_param_tab.md @@ -2,6 +2,6 @@ The `PlotSignificanceScan` task collects the fit results from `MergeSignificance <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,53,48,17,18,3,4,64,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,53,48,17,18,74,3,4,64,5,6,7,8,9" </div> diff --git a/docs/content/snippets/plotupperlimits_param_tab.md b/docs/content/snippets/plotupperlimits_param_tab.md index c00580f2..88e7fda1 100644 --- a/docs/content/snippets/plotupperlimits_param_tab.md +++ b/docs/content/snippets/plotupperlimits_param_tab.md @@ -3,6 +3,6 @@ It provides some handy CLI parameters to manipulate the visualisation. <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,48,17,18,10,11,3,4,5,6,7,8,9" +--8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,12,48,17,18,74,10,11,3,4,5,6,7,8,9" </div> diff --git a/docs/content/snippets/plotupperlimitsatpoint_param_tab.md b/docs/content/snippets/plotupperlimitsatpoint_param_tab.md index 367bab9b..5d04148e 100644 --- a/docs/content/snippets/plotupperlimitsatpoint_param_tab.md +++ b/docs/content/snippets/plotupperlimitsatpoint_param_tab.md @@ -2,6 +2,6 @@ The `PlotUpperLimitsAtPoint` task collects a single fit result from each `UpperL <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,21,19,16,14,66,12,48,17,18,10,11,3,4,22,23,37,5,6,38" +--8<-- "content/snippets/parameters.md@-2,21,19,16,14,66,12,48,17,18,74,10,11,3,4,22,23,37,5,6,38" </div> diff --git a/docs/content/snippets/pullsandimpacts_param_tab.md b/docs/content/snippets/pullsandimpacts_param_tab.md index aed97a9d..ef203b08 100644 --- a/docs/content/snippets/pullsandimpacts_param_tab.md +++ b/docs/content/snippets/pullsandimpacts_param_tab.md @@ -3,6 +3,6 @@ It provides some handy cli parameters to manipulate POIs, ranges and other optio <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,74,17,18,56,28,33,13,34,36" +--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,17,18,74,56,28,33,13,34,36" </div> diff --git a/docs/content/snippets/significancescan_param_tab.md b/docs/content/snippets/significancescan_param_tab.md index 8ae51a56..8dd4c507 100644 --- a/docs/content/snippets/significancescan_param_tab.md +++ b/docs/content/snippets/significancescan_param_tab.md @@ -2,6 +2,6 @@ The `SignificanceScan` runs the fits for each point in the defined scan paramete <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,48,17,18,47,53,34,35,13,36" +--8<-- "content/snippets/parameters.md@-2,20,19,16,54,14,66,48,17,18,74,47,53,34,35,13,36" </div> diff --git a/docs/content/snippets/snapshot_param_tab.md b/docs/content/snippets/snapshot_param_tab.md new file mode 100644 index 00000000..9cb13a61 --- /dev/null +++ b/docs/content/snippets/snapshot_param_tab.md @@ -0,0 +1,8 @@ +The `Snapshot` task runs a single fit for the given POI and parameter values, and saves the workspace with all parameters set to their best fit values following a standard `MultiDimFit`. +It provides some handy cli parameters to manipulate POIs, ranges and other options. + +<div class="dhi_parameter_table"> + +--8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18,47,34,35,13,36" + +</div> diff --git a/docs/content/snippets/upperlimits_param_tab.md b/docs/content/snippets/upperlimits_param_tab.md index d6c78872..8d6eedfc 100644 --- a/docs/content/snippets/upperlimits_param_tab.md +++ b/docs/content/snippets/upperlimits_param_tab.md @@ -3,6 +3,6 @@ It provides some handy cli parameters to manipulate POIs, ranges and other optio <div class="dhi_parameter_table"> ---8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18,47,34,35,13,36" +--8<-- "content/snippets/parameters.md@-2,20,19,16,15,14,66,48,17,18,74,47,34,35,13,36" </div> diff --git a/docs/content/tasks/snapshot.md b/docs/content/tasks/snapshot.md new file mode 100644 index 00000000..5505a0d6 --- /dev/null +++ b/docs/content/tasks/snapshot.md @@ -0,0 +1,86 @@ +The `Snapshot` task is essentially performing a `MultiDimFit` and saves a copy of the initial workspace with all parameters set to their best fit values. +This updated workspace can be used in all subsequent `combine` commands just like the normal workspace, with the benefit that parameters are already at sensible values before the fit which can lead to faster and potentially more stable convergence. +See the [combine documentation](http://cms-analysis.github.io/HiggsAnalysis-CombinedLimit/part3/runningthetool/#loading-snapshots) for more info on snapshots. + +Almost all tasks within the inference tools are able to use a workspace snapshot instead of the normal workspace by adding ==`--use-snapshot`== to the `law run ...` command to run. +By doing so, the requirement structure is slightly changed in order to first run the snapshot for the selected datacards, HH physics model, parameter options, blinded / unblinded flags, etc. + +As an example, the workflow triggered by `PlotLikelihoodScan` with the additional `--use-snapshot` option will look like this: + +```mermaid + graph LR; + A(PlotLikelihoodScan) --> B(MergeLikelihoodScan); + B --> C([LikelihoodScan]); + C --> D([Snapshot]); + D --> E(CreateWorkspace); + E --> F(CombineDatacards); +``` + +Note the additional `Snapshot` task between `Likelihoodscan` and `CreateWorkspace`. +In any other regard, the `Snapshot` behaves like the `LikelihoodScan` task, i.e., it supports the same options and it is able to submit jobs to HTCondor (as indicated by the round boxes). + +- [Quick example](#quick-example) +- [Dependencies](#dependencies) +- [Parameters](#parameters) +- [Example commands](#example-commands) + + +#### Quick example + +```shell +law run Snapshot \ + --version dev \ + --datacards $DHI_EXAMPLE_CARDS \ + --pois r +``` + +#### Dependencies + +```mermaid + graph LR; + A([Snapshot]) --> B(CreateWorkspace); + B --> C(CombineDatacards); +``` + +Rounded boxes mark [workflows](practices.md#workflows) with the option to run tasks as HTCondor jobs. + + +#### Parameters + +=== "LikelihoodScan" + + --8<-- "content/snippets/snapshot_param_tab.md" + +=== "CreateWorkspace" + + --8<-- "content/snippets/createworkspace_param_tab.md" + +=== "CombineDatacards" + + --8<-- "content/snippets/combinedatacards_param_tab.md" + + +#### Example commands + +**1.** Blinded snapshot on POI `kl` with `kt` set to 1.5. + +```shell hl_lines="4-5" +law run Snapshot \ + --version dev \ + --datacards $DHI_EXAMPLE_CARDS \ + --pois kl \ + --parameter-values kt=1.5 +``` + + +**2.** Same as example command 1, but execute the snapshot task on HTCondor and run unblinded. + +```shell hl_lines="6-7" +law run Snapshot \ + --version dev \ + --datacards $DHI_EXAMPLE_CARDS \ + --pois kl \ + --parameter-values kt=1.5 \ + --workflow htcondor \ + --unblinded +``` diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index c264d2f1..df96bf77 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -71,7 +71,7 @@ markdown_extensions: check_paths: true extra_javascript: - - https://unpkg.com/mermaid@8.6/dist/mermaid.min.js + - https://unpkg.com/mermaid@8.12/dist/mermaid.min.js extra_css: - extra/styles.css @@ -94,6 +94,7 @@ nav: - Postfit plots: tasks/postfit.md - Goodness-of-fit tests: tasks/gof.md - EFT limits: tasks/eft.md + - Snapshots: tasks/snapshot.md - Best practices: tasks/practices.md - Datacard manipulation: datacard_manipulation.md - Useful scripts: scripts.md -- GitLab From 8ee65949a307f309f9a33dffb84c1e18baf077e7 Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Tue, 7 Sep 2021 10:52:08 +0200 Subject: [PATCH 19/21] Remove constant zero point in all fits. --- dhi/tasks/combine.py | 7 +++++++ dhi/tasks/likelihoods.py | 1 - dhi/tasks/snapshot.py | 1 - 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/dhi/tasks/combine.py b/dhi/tasks/combine.py index 319e4be0..8c04e5a3 100644 --- a/dhi/tasks/combine.py +++ b/dhi/tasks/combine.py @@ -1395,9 +1395,16 @@ class CombineCommandTask(CommandTask): @property def combine_optimization_args(self): + # start with minimizer args args = self.get_minimizer_args() + + # additional args + args += " --X-rtd REMOVE_CONSTANT_ZERO_POINT=1" + + # optimization for discrete parameters (as suggested by bbgg) if self.optimize_discretes: args += " " + self.combine_discrete_options + return args def get_command(self): diff --git a/dhi/tasks/likelihoods.py b/dhi/tasks/likelihoods.py index 147077b4..36d2aa0c 100644 --- a/dhi/tasks/likelihoods.py +++ b/dhi/tasks/likelihoods.py @@ -134,7 +134,6 @@ class LikelihoodScan(LikelihoodBase, CombineCommandTask, law.LocalWorkflow, HTCo " --freezeNuisanceGroups {self.joined_frozen_groups}" " {snapshot_args}" " --saveNLL" - " --X-rtd REMOVE_CONSTANT_ZERO_POINT=1" " {self.combine_optimization_args}" " && " "mv higgsCombineTest.MultiDimFit.mH{self.mass_int}.{self.branch}.root {output}" diff --git a/dhi/tasks/snapshot.py b/dhi/tasks/snapshot.py index fcdd5a15..1f578c09 100644 --- a/dhi/tasks/snapshot.py +++ b/dhi/tasks/snapshot.py @@ -81,7 +81,6 @@ class Snapshot(POITask, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow) " --freezeNuisanceGroups {self.joined_frozen_groups}" " --saveWorkspace" " --saveNLL" - " --X-rtd REMOVE_CONSTANT_ZERO_POINT=1" " {self.combine_optimization_args}" " && " "mv higgsCombineTest.MultiDimFit.mH{self.mass_int}.{self.branch}.root {output}" -- GitLab From 0df19f47e3763d788887b403534ba9431044de7c Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Tue, 7 Sep 2021 12:01:27 +0200 Subject: [PATCH 20/21] Fix multi likelihood plots. --- README.md | 5 ++++- dhi/plots/likelihoods.py | 18 +++++++++--------- docs/content/introduction.md | 5 ++++- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 1976798c..eb877bce 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,9 @@ module 'dhi.tasks.base', 3 task(s): - BundleRepo - BundleSoftware +module 'dhi.tasks.snapshot', 1 task(s): + - Snapshot + module 'dhi.tasks.limits', 7 task(s): - UpperLimits - PlotUpperLimitsAtPoint @@ -154,7 +157,7 @@ module 'dhi.tasks.exclusion', 2 task(s): - PlotExclusionAndBestFit - PlotExclusionAndBestFit2D -written 44 task(s) to index file '/your/path/inference/.law/index' +written 45 task(s) to index file '/your/path/inference/.law/index' ``` You can type diff --git a/dhi/plots/likelihoods.py b/dhi/plots/likelihoods.py index 33a7a6a0..c525dad4 100644 --- a/dhi/plots/likelihoods.py +++ b/dhi/plots/likelihoods.py @@ -119,9 +119,9 @@ def plot_likelihood_scan_1d( r.setup_line(line, props={"LineColor": colors.black, "LineStyle": 2, "NDC": False}) draw_objs.append(line) - # lines at chi2_1 intervals + # horizontal lines at chi2_1 intervals for n in [chi2_levels[1][1], chi2_levels[1][2]]: - if n < y_max_line: + if y_min <= n <= y_max_line: line = ROOT.TLine(x_min, n, x_max, n) r.setup_line(line, props={"LineColor": colors.black, "LineStyle": 2, "NDC": False}) draw_objs.append(line) @@ -144,8 +144,8 @@ def plot_likelihood_scan_1d( if not has_thy_err: legend_entries.append((line_thy, "Theory prediction", "L")) - # line for best fit value - if show_best_fit and scan: + # vertical line for best fit value + if show_best_fit and scan and (x_min <= scan.poi_min <= x_max): line_fit = ROOT.TLine(scan.poi_min, y_min, scan.poi_min, y_max_line) r.setup_line(line_fit, props={"LineWidth": 2, "NDC": False}, color=colors.black) draw_objs.append(line_fit) @@ -255,7 +255,7 @@ def plot_likelihood_scans_1d( values = { k: np.array(v, dtype=np.float32) for k, v in values.items() - if k in [poi1, poi2, "dnll2"] + if k in [poi, "dnll2"] } # preprocess values (nan detection, negative shift) values["dnll2"], values[poi] = _preprocess_values(values["dnll2"], (poi, values[poi]), @@ -294,9 +294,9 @@ def plot_likelihood_scans_1d( r.setup_hist(h_dummy, pad=pad, props={"LineWidth": 0, "Minimum": y_min, "Maximum": y_max}) draw_objs.append((h_dummy, "HIST")) - # lines at chi2_1 intervals + # horizontal lines at chi2_1 intervals for n in [chi2_levels[1][1], chi2_levels[1][2]]: - if n < y_max: + if y_min <= n <= y_max_line: line = ROOT.TLine(x_min, n, x_max, n) r.setup_line(line, props={"LineColor": colors.black, "LineStyle": 2, "NDC": False}) draw_objs.append(line) @@ -319,8 +319,8 @@ def plot_likelihood_scans_1d( legend_entries.insert(0, (g_nll, to_root_latex(br_hh_names.get(d["name"], d["name"])), "LP" if show_points else "L")) - # line for best fit value - if show_best_fit and scan: + # vertical line for best fit value + if show_best_fit and scan and (x_min <= scan.poi_min <= x_max): line_fit = ROOT.TLine(scan.poi_min, y_min, scan.poi_min, y_max_line) r.setup_line(line_fit, props={"LineWidth": 2, "NDC": False}, color=colors[col]) draw_objs.append(line_fit) diff --git a/docs/content/introduction.md b/docs/content/introduction.md index b84fdaa1..18cacf94 100644 --- a/docs/content/introduction.md +++ b/docs/content/introduction.md @@ -95,6 +95,9 @@ module 'dhi.tasks.base', 3 task(s): - BundleRepo - BundleSoftware +module 'dhi.tasks.snapshot', 1 task(s): + - Snapshot + module 'dhi.tasks.limits', 7 task(s): - UpperLimits - PlotUpperLimitsAtPoint @@ -151,7 +154,7 @@ module 'dhi.tasks.exclusion', 2 task(s): - PlotExclusionAndBestFit - PlotExclusionAndBestFit2D -written 44 task(s) to index file '/your/path/inference/.law/index' +written 45 task(s) to index file '/your/path/inference/.law/index' ``` You can type -- GitLab From 1d9ef4e85ede3d276fb0274be8a461929ed44273 Mon Sep 17 00:00:00 2001 From: Marcel R <github.riga@icloud.com> Date: Tue, 7 Sep 2021 13:43:59 +0200 Subject: [PATCH 21/21] Remove comment in setup script. --- setup.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/setup.sh b/setup.sh index 6d133caf..b052de18 100644 --- a/setup.sh +++ b/setup.sh @@ -77,9 +77,6 @@ setup() { [ "$DHI_REINSTALL_COMBINE" = "1" ] && rm -f "$flag_file_combine" if [ ! -f "$flag_file_combine" ]; then mkdir -p "$DHI_SOFTWARE" - # the combine setup below uses a custom branch which adds only a single commit on top of - # the recommended v8.2.0 tag; once https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit/pull/686 - # is merged, we can transition back to the central repo if [ "$DHI_COMBINE_STANDALONE" != "True" ]; then # CMSSW based ( -- GitLab