Commit 0c3c5e89 authored by Marcel Rieger's avatar Marcel Rieger
Browse files

Allow CH-style impact calculation from Hessian.

parent e1bb8f19
Pipeline #3483674 passed with stage
in 41 seconds
......@@ -5,3 +5,4 @@
- `HBRscaler.py`: Initial implementation of the scaling of Higgs branching ratios and single Higgs backgrounds with kappa parameters. Fully included in `hh_model.py`.
- `HHModelPinv.py` (*deprecated*): Old HH physics model for development. To be removed.
- `HHModelPinv_C2klkt.py` (*deprecated*): Old HH physics model for development. To be removed.
- `HHModelPinvVHH.py` (*deprecated*): Old HH physics model for development. To be removed.
......@@ -1468,9 +1468,11 @@ def excluded_to_allowed_ranges(excluded_ranges, min_value, max_value):
taking into account the endpoints given by *min_value* and *max_value*. An open range is denoted
by a *None*. Thus, (*None*, *None*) would mean that the entire range is allowed.
"""
# shorthands
# shorthands and checks
e_ranges = excluded_ranges
a_ranges = []
min_value = float(min_value)
max_value = float(max_value)
# excluded_ranges might mark open ends with None's as well, so before linearizing values,
# replace them with the global endpoints
......@@ -1481,7 +1483,7 @@ def excluded_to_allowed_ranges(excluded_ranges, min_value, max_value):
e_ranges[-1] = (e_ranges[-1][0], max_value)
# linearize excluded ranges (assuming edges are always reasonably far apart)
e_edges = sum(map(list, excluded_ranges), [])
e_edges = map(float, sum(map(list, excluded_ranges), []))
# loop through adjacent pairs and create allowed ranges in an alternating fashion
for i, (start, stop) in enumerate(zip(e_edges[:-1], e_edges[1:])):
......
......@@ -248,7 +248,7 @@ class PlotUpperLimits(UpperLimitsBase, POIPlotTask):
outputs["plots"] = [self.local_target(name) for name in names]
if self.save_ranges:
outputs["ranges"] = self.local_target("ranges_{}.json".format(
outputs["ranges"] = self.local_target("ranges__{}.json".format(
self.get_output_postfix()))
return outputs
......@@ -398,7 +398,7 @@ class PlotMultipleUpperLimits(PlotUpperLimits, MultiDatacardTask):
outputs["plots"] = [self.local_target(name) for name in names]
if self.save_ranges:
outputs["ranges"] = self.local_target("ranges_{}.json".format(
outputs["ranges"] = self.local_target("ranges__{}.json".format(
self.get_output_postfix()))
return outputs
......@@ -533,7 +533,7 @@ class PlotMultipleUpperLimitsByModel(PlotUpperLimits, MultiHHModelTask):
outputs["plots"] = [self.local_target(name) for name in names]
if self.save_ranges:
outputs["ranges"] = self.local_target("ranges_{}.json".format(
outputs["ranges"] = self.local_target("ranges__{}.json".format(
self.get_output_postfix()))
return outputs
......
......@@ -18,6 +18,12 @@ from dhi.datacard_tools import get_workspace_parameters
class PullsAndImpactsBase(POITask, SnapshotUser):
method = luigi.ChoiceParameter(
choices=("default", "hesse"),
default="default",
description="the computation method; 'default' means no approximation; "
"choices: default,hesse,robust; default: default",
)
only_parameters = law.CSVParameter(
default=(),
description="comma-separated parameter names to include; supports patterns; skips all "
......@@ -41,51 +47,45 @@ class PullsAndImpactsBase(POITask, SnapshotUser):
def get_output_postfix(self, join=True):
parts = super(PullsAndImpactsBase, self).get_output_postfix(join=False)
if self.method != "default":
parts.append(self.method)
if self.use_snapshot:
parts.append("fromsnapshot")
return self.join_postfix(parts) if join else parts
def get_selected_parameters(self, workspace_parameters):
if not workspace_parameters:
return []
class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow):
# start with all except pois
params = [p for p in workspace_parameters if p != self.pois[0]]
run_command_in_tmp = True
# remove mc stats parameters if requested, otherwise move them to the end
is_mc_stats = lambda p: law.util.multi_match(p, self.mc_stats_patterns, mode=any)
if self.mc_stats:
sort_fn = lambda p: params.index(p) * (100000 if is_mc_stats(p) else 1)
params = sorted(params, key=sort_fn)
else:
params = [p for p in params if not is_mc_stats(p)]
def __init__(self, *args, **kwargs):
super(PullsAndImpacts, self).__init__(*args, **kwargs)
# filter and skip
if self.only_parameters:
params = [p for p in params if law.util.multi_match(p, self.only_parameters)]
if self.skip_parameters:
params = [p for p in params if not law.util.multi_match(p, self.skip_parameters)]
self._cache_branches = False
return params
def create_branch_map(self):
# the first branch (index 0) is the nominal fit
branches = ["nominal"]
# read the nuisance parameters from the workspace file when present
params = self.workspace_parameters
if params:
# remove poi
params = [p for p in params if p != self.pois[0]]
# remove mc stats parameters if requested, otherwise move them to the end
is_mc_stats = lambda p: law.util.multi_match(p, self.mc_stats_patterns, mode=any)
if self.mc_stats:
sort_fn = lambda p: params.index(p) * (100000 if is_mc_stats(p) else 1)
params = sorted(params, key=sort_fn)
else:
params = [p for p in params if not is_mc_stats(p)]
# skip
if self.only_parameters:
params = [p for p in params if law.util.multi_match(p, self.only_parameters)]
if self.skip_parameters:
params = [p for p in params if not law.util.multi_match(p, self.skip_parameters)]
class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow, HTCondorWorkflow):
# add to branches
branches.extend(params)
run_command_in_tmp = True
self._cache_branches = True
def __init__(self, *args, **kwargs):
super(PullsAndImpacts, self).__init__(*args, **kwargs)
return branches
self._cache_branches = False
@law.cached_workflow_property(setter=False, empty_value=law.no_value)
def workspace_parameters(self):
......@@ -94,6 +94,22 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow
return law.no_value
return get_workspace_parameters(ws_input.path)
def create_branch_map(self):
# the first branch (index 0) is the nominal fit of the poi
branches = [self.pois[0]]
# only the default method needs additional parameter fits
if self.method == "default":
params = self.get_selected_parameters(self.workspace_parameters)
if params:
# add to branches
branches.extend(params)
# mark that the branch map is cached from now on
self._cache_branches = True
return branches
def workflow_requires(self):
reqs = super(PullsAndImpacts, self).workflow_requires()
reqs["workspace"] = CreateWorkspace.req(self)
......@@ -109,10 +125,20 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow
def output(self):
parts = [self.branch_data]
name = self.join_postfix(["fit", self.get_output_postfix(), parts]) + ".root"
return self.local_target(name)
name = lambda prefix: self.join_postfix([prefix, self.get_output_postfix(), parts]) + ".root"
# the default fit result
outputs = {"result": self.local_target(name("fit"))}
# additional multidimfit file for branch 0 (needs --saveFitResult)
if self.branch == 0:
outputs["multidimfit"] = self.local_target(name("multidimfit"))
return outputs
def build_command(self):
outputs = self.output()
# get the workspace to use and define snapshot args
if self.use_snapshot:
workspace = self.input()["snapshot"].path
......@@ -127,21 +153,46 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow
else:
blinded_args = "--seed {self.branch} --toys {self.toys}".format(self=self)
# define branch dependent options
if self.branch == 0:
# initial fit
# define branch and method dependent options
if self.method == "default":
if self.branch == 0:
# initial fit
branch_opts = (
" --algo singles"
" --saveFitResult"
)
else:
# nuisance fits
branch_opts = (
" --algo impact"
" --parameters {}"
" --floatOtherPOIs 1"
" --saveInactivePOI 1"
).format(self.branch_data)
elif self.method == "hesse":
# setup a single fit
branch_opts = (
" --algo singles"
" --algo none"
" --floatOtherPOIs 1"
" --saveInactivePOI 1"
" --saveFitResult"
)
else:
# nuisance fits
branch_opts = (
" --algo impact"
" --parameters {}"
" --floatOtherPOIs 1"
" --saveInactivePOI 1"
).format(self.branch_data)
raise NotImplementedError
# move statements for saving output files
mv_cmd = (
"mv higgsCombineTest.MultiDimFit.mH{self.mass_int}.{self.branch}.root {output}"
).format(
self=self,
output=outputs["result"].path,
)
if self.branch == 0:
mv_cmd += (
" && mv multidimfitTest.root {output}"
).format(
output=outputs["multidimfit"].path,
)
# define the basic command
cmd = (
......@@ -160,14 +211,14 @@ class PullsAndImpacts(PullsAndImpactsBase, CombineCommandTask, law.LocalWorkflow
" {self.combine_optimization_args}"
" {branch_opts}"
" && "
"mv higgsCombineTest.MultiDimFit.mH{self.mass_int}.{self.branch}.root {output}"
"{mv_cmd}"
).format(
self=self,
workspace=workspace,
output=self.output().path,
snapshot_args=snapshot_args,
blinded_args=blinded_args,
branch_opts=branch_opts,
mv_cmd=mv_cmd,
)
return cmd
......@@ -217,75 +268,77 @@ class MergePullsAndImpacts(PullsAndImpactsBase):
params = req.workspace_parameters
poi = self.pois[0]
# extract all fit results
# extract all fit results, depending on the method
fit_results = {}
fail_info = []
for b, name in branch_map.items():
inp = inputs[b]
if not inp.exists():
self.logger.warning("input of branch {} at {} does not exist".format(b, inp.path))
continue
tree = inp.load(formatter="uproot")["limit"]
values = tree.arrays([poi] if b == 0 else [poi, name])
# the fit converged when there are 3 values in the parameter array
converged = values[poi if b == 0 else name].size == 3
if not converged:
# when not converged and failures should be kept, change the result
if self.keep_failures:
fit_results[b] = {
name: np.array([np.nan, np.nan, np.nan]),
"r": np.array([np.nan, np.nan, np.nan]),
}
# when not converged and failures should not be kept, raise fail info
if self.method == "default":
# default method: loop through fit result per branch
fail_info = []
for b, name in branch_map.items():
inp = inputs[b]["result"]
if not inp.exists():
self.logger.warning("input {} of branch {} does not exist".format(inp.path, b))
continue
# load the result
values = self.load_default_fit(inp, poi, name, keep_failures=self.keep_failures)
if values:
fit_results[name] = values
else:
fail_info.append((b, name, inp.path))
else:
fit_results[b] = values
# throw an error with instructions when a fit failed
if fail_info:
failing_branches = [b for b, _, _ in fail_info]
working_branches = [b for b in branch_map if b not in failing_branches]
working_branches = law.util.range_join(working_branches, to_str=True)
c = law.util.colored
msg = "{} failing parameter fit(s) detected:\n".format(len(fail_info))
for b, name, _ in fail_info:
msg += " {} (branch {})\n".format(name, b)
msg += "\nBranches: {}".format(",".join(map(str, failing_branches)))
msg += c("\nYou have two options\n\n", style=("underlined", "bright"))
msg += " " + c("1.", "magenta")
msg += " You can try to remove the corresponding output files via\n\n"
for _, _, path in fail_info:
msg += " rm {}\n".format(path)
msg += "\n and then add different options such as\n\n"
msg += c(" --PullsAndImpacts-custom-args='--X-rtd MINIMIZER_no_analytic'\n",
style="bright")
msg += "\n to your 'law run ...' command.\n\n"
msg += " " + c("2.", "magenta")
msg += " You can proceed with the converging fits only by adding\n\n"
msg += c(" --PullsAndImpacts-branches {}\n\n".format(working_branches),
style="bright")
msg += " which effectively skips all failing fits in your case.\n"
raise Exception(msg)
# throw an error with instructions when a fit failed
if fail_info:
failing_branches = [b for b, _, _ in fail_info]
working_branches = [b for b in branch_map if b not in failing_branches]
working_branches = law.util.range_join(working_branches, to_str=True)
c = law.util.colored
msg = "{} failing parameter fit(s) detected:\n".format(len(fail_info))
for b, name, _ in fail_info:
msg += " {} (branch {})\n".format(name, b)
msg += "\nBranches: {}".format(",".join(map(str, failing_branches)))
msg += c("\nYou have two options\n\n", style=("underlined", "bright"))
msg += " " + c("1.", "magenta")
msg += " You can try to remove the corresponding output files via\n\n"
for _, _, path in fail_info:
msg += " rm {}\n".format(path)
msg += "\n and then add different options such as\n\n"
msg += c(" --PullsAndImpacts-custom-args='--X-rtd MINIMIZER_no_analytic'\n",
style="bright")
msg += "\n to your 'law run ...' command.\n\n"
msg += " " + c("2.", "magenta")
msg += " You can proceed with the converging fits only by adding\n\n"
msg += c(" --PullsAndImpacts-branches {}\n\n".format(working_branches),
style="bright")
msg += " which effectively skips all failing fits in your case.\n"
raise Exception(msg)
elif self.method == "hesse":
# load all parameter fits from the mdf result
fit_results = self.load_hesse_fits(inputs[0]["multidimfit"], poi,
[poi] + self.get_selected_parameters(params))
else:
raise NotImplementedError
# merge values and parameter infos into data structure similar to the one produced by
# CombineHarvester the only difference is that two sided impacts are stored as well
data = OrderedDict(method="default")
data = OrderedDict()
# save the method
data["method"] = self.method
# load and store nominal value
data["POIs"] = [{"name": poi, "fit": fit_results[0][poi][[1, 0, 2]].tolist()}]
data["POIs"] = [{"name": poi, "fit": fit_results[poi][poi][[1, 0, 2]].tolist()}]
self.publish_message("read nominal values")
# load and store parameter results
data["params"] = []
for b, vals in fit_results.items():
for name, vals in fit_results.items():
# skip the nominal fit
if b == 0:
if name == poi:
continue
d = OrderedDict()
name = branch_map[b]
# parameter name
d["name"] = name
# parameter pdf type
......@@ -312,6 +365,46 @@ class MergePullsAndImpacts(PullsAndImpactsBase):
self.output().dump(data, indent=4, formatter="json")
@classmethod
def load_default_fit(cls, target, poi, param, keep_failures=False):
tree = target.load(formatter="uproot")["limit"]
values = tree.arrays([poi] if param == poi else [poi, param])
# the fit converged when there are 3 values in the parameter array
converged = values[param].size == 3
if converged:
return values
elif keep_failures:
return {
poi: np.array([np.nan, np.nan, np.nan]),
param: np.array([np.nan, np.nan, np.nan]),
}
# error case
return None
@classmethod
def load_hesse_fits(cls, target, poi, params):
# bluntly copied from CombineHarvester/CombineTools/python/combine/utils.py
with target.load(formatter="root") as f:
rfr = f.Get("fit_mdf")
# check the fit status
if rfr.covQual() != 3:
raise Exception("inaccurate covariance matrix")
res = OrderedDict()
for param in params:
res[param] = OrderedDict()
for p in [poi, param]:
pj = rfr.floatParsFinal().find(p)
vj = pj.getVal()
ej = pj.getError()
c = rfr.correlation(param, p)
res[param][p] = np.array([vj, vj - ej * c, vj + ej * c]) # order changed w.r.t. CH
return res
class PlotPullsAndImpacts(PullsAndImpactsBase, POIPlotTask, BoxPlotTask):
......
......@@ -2,6 +2,6 @@ The `MergePullsAndImpacts` task collects the fit results from the `PullsAndImpac
<div class="dhi_parameter_table">
--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,74,17,18,74,56,28,33,67"
--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,74,82,17,18,74,56,28,33,67"
</div>
......@@ -79,3 +79,4 @@
| `--shift-negative-values` | When `True`, dnll2 values are vertically shifted to move the minimum back to zero in case it is negative. Defaults to `False`. |
| `--signal-from-limit` | When `True`, instead of using the best fit value, scale the signal by the upper limit as computed by `UpperLimits`. Defaults to `False`. |
| `--save-ranges` | When `True`, save allowed parameter ranges in an additional output file. Defaults to `False`. |
| `--method` | The fit method, either `default` for single fits per nuisance, or `hesse` for deriving impacts from the Hesse matrix. Defaults to `default`. |
......@@ -2,6 +2,6 @@ The `PlotPullsAndImpacts` task collects fit results from `MergePullsAndImpacts`
<div class="dhi_parameter_table">
--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,17,18,74,65,68,56,28,29,33,27,52,30,31,32,3,4,67"
--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,17,18,74,82,65,68,56,28,29,33,27,52,30,31,32,3,4,67"
</div>
......@@ -3,6 +3,6 @@ It provides some handy cli parameters to manipulate POIs, ranges and other optio
<div class="dhi_parameter_table">
--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,17,18,74,56,28,33,13,34,36"
--8<-- "content/snippets/parameters.md@-2,20,19,16,14,66,48,17,18,74,82,56,28,33,13,34,36"
</div>
Subproject commit 37d9fe4b34514eba2ccdf5231fbedc10acfd6c5f
Subproject commit 1eeed2a9d198746321d7e41823f823cb7bd940ee
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment