Commit f4ffef65 authored by Alexandra Carvalho Antunes De Oliveira's avatar Alexandra Carvalho Antunes De Oliveira
Browse files

merge conflicts fix

parents 26297988 5635379e
Pipeline #3903144 skipped with stage
......@@ -77,22 +77,23 @@ br_hh_names = DotDict(
bbbb_boosted=r"4b, high $m_{HH}$",
bbbb_boosted_ggf=r"4b #scale[0.75]{high $m_{HH}$, ggF}",
bbbb_boosted_vbf=r"4b #scale[0.75]{high $m_{HH}$, VBF}",
bbvv=r"bbVV",
bbww=r"bbWW",
bbwwqqlv=r"bbWW, qql$\nu$",
bbwwlvlv=r"bbWW, 2l2$\nu$",
bbzz=r"bbZZ",
bbzzqqll=r"bbZZ, qqll",
bbzzllll=r"bbZZ",
bbtt=r"bb$\tau\tau$",
bbgg=r"bb$\gamma\gamma$",
ttww=r"WW$\tau\tau",
ttzz=r"ZZ$\tau\tau",
tttt=r"$\tau\tau\tau\tau$",
wwww=r"WWWW",
zzzz=r"ZZZZ",
wwzz=r"WWZZ",
wwgg=r"WW$\gamma\gamma$",
bbbb_all=r"bb bb",
bbvv=r"bb VV",
bbww=r"b bWW",
bbwwqqlv=r"bb WW, qql$\nu$",
bbwwlvlv=r"bb WW, 2l2$\nu$",
bbzz=r"bb ZZ",
bbzzqqll=r"bb ZZ, qqll",
bbzzllll=r"bb ZZ",
bbtt=r"bb $\tau\tau$",
bbgg=r"bb $\gamma\gamma$",
ttww=r"WW $\tau\tau",
ttzz=r"ZZ $\tau\tau",
tttt=r"$\tau\tau$ $\tau\tau$",
wwww=r"WW WW",
zzzz=r"ZZ ZZ",
wwzz=r"WW ZZ",
wwgg=r"WW $\gamma\gamma$",
multilepton="Multilepton",
)
# aliases
......@@ -117,21 +118,18 @@ campaign_labels.update(br_hh_names)
# poi defaults (value, range, points, taken from physics model) and labels
# note: C2V and CV are not following kappa notation and are upper case to be consistent to the model
poi_data = DotDict(
r=DotDict(range=(-20.0, 20.0), label="r", sm_value=1.0),
r_gghh=DotDict(range=(-20.0, 20.0), label="r_{gghh}", sm_value=1.0),
r_qqhh=DotDict(range=(-20.0, 20.0), label="r_{qqhh}", sm_value=1.0),
r_vhh=DotDict(range=(-20.0, 20.0), label="r_{vhh}", sm_value=1.0),
kl=DotDict(range=(-30.0, 30.0), label=r"\kappa_{\lambda}", sm_value=1.0),
kt=DotDict(range=(-10.0, 10.0), label=r"\kappa_{t}", sm_value=1.0),
C2V=DotDict(range=(-10.0, 10.0), label=r"\kappa_{2V}", sm_value=1.0),
CV=DotDict(range=(-10.0, 10.0), label=r"\kappa_{V}", sm_value=1.0),
C2=DotDict(range=(-2.0, 3.0), label=r"c_{2}", sm_value=0.0),
CG=DotDict(range=(-2.0, 2.0), label=r"c_{g}", sm_value=0.0),
C2G=DotDict(range=(-2.0, 2.0), label=r"c_{2g}", sm_value=0.0),
r=DotDict(range=(-20.0, 20.0), label=r"$r$", sm_value=1.0),
r_gghh=DotDict(range=(-20.0, 20.0), label=r"$r_{gghh}$", sm_value=1.0),
r_qqhh=DotDict(range=(-20.0, 20.0), label=r"$r_{qqhh}$", sm_value=1.0),
r_vhh=DotDict(range=(-20.0, 20.0), label=r"$r_{vhh}$", sm_value=1.0),
kl=DotDict(range=(-30.0, 30.0), label=r"$\kappa_{\lambda}$", sm_value=1.0),
kt=DotDict(range=(-10.0, 10.0), label=r"$\kappa_{t}$", sm_value=1.0),
C2V=DotDict(range=(-10.0, 10.0), label=r"$\kappa_{2V}$", sm_value=1.0),
CV=DotDict(range=(-10.0, 10.0), label=r"$\kappa_{V}$", sm_value=1.0),
C2=DotDict(range=(-2.0, 3.0), label=r"$C_{2}$", sm_value=0.0),
CG=DotDict(range=(-2.0, 2.0), label=r"$C_{g}$", sm_value=0.0),
C2G=DotDict(range=(-2.0, 2.0), label=r"$C_{2g}$", sm_value=0.0),
)
# add "$" embedded labels
for poi, data in poi_data.items():
data["label_math"] = "${}$".format(data.label)
# colors
colors = DotDict(
......@@ -161,6 +159,8 @@ colors = DotDict(
blue_signal=(67, 118, 201),
blue_signal_trans=(67, 118, 201, 0.5),
purple=881,
brazil_yellow=800, # kOrange
brazil_green=417, # kGreen + 1
),
)
......
......@@ -16,7 +16,9 @@ from collections import OrderedDict, defaultdict
import law
import six
from dhi.util import import_ROOT, real_path, multi_match, copy_no_collisions, TFileCache
from dhi.util import (
import_ROOT, real_path, multi_match, copy_no_collisions, TFileCache, prepare_output,
)
#: Parameter directives excluding groups, autoMCStats and nuisace edit lines.
......@@ -593,10 +595,7 @@ def manipulate_datacard(datacard, target_datacard=None, read_only=False, read_st
# prepare the target location when given
if target_datacard:
target_datacard = real_path(target_datacard)
target_dirname = os.path.dirname(target_datacard)
if not os.path.exists(target_dirname):
os.makedirs(target_dirname)
target_datacard = prepare_output(target_datacard)
# prepare the writer
if writer == "simple":
......@@ -862,13 +861,9 @@ def bundle_datacard(datacard, directory, shapes_directory=".", skip_shapes=False
datacard is returned.
"""
# prepare the directories
directory = real_path(directory)
if not os.path.exists(directory):
os.makedirs(directory)
directory = prepare_output(directory, is_dir=True)
shapes_directory_relative = not shapes_directory.startswith("/")
shapes_directory = real_path(os.path.join(directory, shapes_directory or "."))
if not os.path.exists(shapes_directory):
os.makedirs(shapes_directory)
shapes_directory = prepare_output(os.path.join(directory, shapes_directory or "."), is_dir=True)
# copy the card itself
src_datacard = real_path(datacard)
......
# coding: utf-8
"""
Collection of lightweight, functional helpers to create HEPData entries.
"""
from collections import OrderedDict
import yaml
from scinum import Number, create_hep_data_representer
from dhi.util import DotDict, import_ROOT, prepare_output, create_tgraph
from dhi.plots.util import get_graph_points
#
# setup and general helpers
#
# configure yaml to transparently encode OrderedDict and DotDict instances like normal dicts
map_repr = lambda dumper, data: dumper.represent_mapping("tag:yaml.org,2002:map", data.items())
yaml.add_representer(OrderedDict, map_repr)
yaml.add_representer(DotDict, map_repr)
# configure yaml to convert scinum.Number's with uncertainties to hep data value structs
yaml.add_representer(Number, create_hep_data_representer())
class Dumper(yaml.Dumper):
"""
Custom dumper class ensuring that sequence items are idented.
"""
def increase_indent(self, *args, **kwargs):
kwargs["indentless"] = False
return super(Dumper, self).increase_indent(*args, **kwargs)
def save_hep_data(data, path, **kwargs):
# default configs
kwargs.setdefault("indent", 2)
# forced configs
kwargs["Dumper"] = Dumper
# prepare the output
path = prepare_output(path)
# dump
with open(path, "w") as f:
yaml.dump(data, f, **kwargs)
print("written HEPData file to {}".format(path))
return path
#
# structured data creators
#
def create_hist_data(independent_variables=None, dependent_variables=None):
# create the entry
data = OrderedDict()
# add placeholders for in/dependent variables
data["independent_variables"] = independent_variables or []
data["dependent_variables"] = dependent_variables or []
return data
def create_independent_variable(label, unit=None, values=None, parent=None):
v = OrderedDict()
# header
v["header"] = OrderedDict({"name": label})
if unit is not None:
v["header"]["units"] = unit
# values
v["values"] = values or []
# add to parent hist data
if parent is not None:
parent["independent_variables"].append(v)
return v
def create_dependent_variable(label, unit=None, qualifiers=None, values=None, parent=None):
v = OrderedDict()
# header
v["header"] = OrderedDict({"name": label})
if unit is not None:
v["header"]["units"] = unit
# qualifiers
if qualifiers is not None:
v["qualifiers"] = qualifiers
# values
v["values"] = values or []
# add to parent hist data
if parent is not None:
parent["dependent_variables"].append(v)
return v
def create_qualifier(name, value, unit=None, parent=None):
q = OrderedDict()
# name and value
q["name"] = name
q["value"] = value
# unit
if unit is not None:
q["units"] = unit
# add to parent dependent variable
if parent is not None:
parent.setdefault("qualifiers", []).append(q)
return q
def create_range(start, stop, parent=None):
r = OrderedDict([("low", start), ("high", stop)])
# add to parent values
if parent is not None:
parent.append(r)
return r
def create_value(value, errors=None, parent=None):
v = OrderedDict()
v["value"] = value
if errors is not None:
v["errors"] = errors
# add to parent values
if parent is not None:
parent.append(v)
return v
def create_error(value, label=None, parent=None):
e = OrderedDict()
# error values
if isinstance(value, (list, tuple)) and len(value) == 2:
e["asymerror"] = OrderedDict([("plus", value[0]), ("minus", value[1])])
else:
e["symerror"] = value
# label
if label is not None:
e["label"] = label
# add to parent value
if parent is not None:
parent.setdefault("errors", []).append(e)
return e
#
# adapters
#
def create_independent_variable_from_x_axis(x_axis, label=None, unit=None, parent=None,
transform=None):
# default transform
if not callable(transform):
transform = lambda bin_number, low, high: (low, high)
# default label
if label is None:
label = x_axis.GetTitle()
# extract bin ranges
values = []
for b in range(1, x_axis.GetNbins() + 1):
low, high = x_axis.GetBinLowEdge(b), x_axis.GetBinUpEdge(b)
# transform
low, high = transform(b, low, high)
# add a new value
values.append(create_range(low, high))
return create_independent_variable(label, unit=unit, values=values, parent=parent)
def create_dependent_variable_from_hist(hist, label=None, unit=None, qualifiers=None, parent=None,
error_label=None, rounding_method=None, transform=None):
# default transform
if not callable(transform):
transform = lambda bin_number, value, error: (value, error)
# default label
if label is None:
label = hist.GetTitle()
# get values
values = []
for b in range(1, hist.GetXaxis().GetNbins() + 1):
v = hist.GetBinContent(b)
# extract the error
err = {}
if error_label:
err_u = hist.GetBinErrorUp(b)
err_d = hist.GetBinErrorLow(b)
err[error_label] = abs(err_u) if err_u == err_d else (err_u, err_d)
# transform if required, create a number object, and add it
v, err = transform(b, v, err)
num = Number(v, err, default_format=rounding_method)
values.append(num)
return create_dependent_variable(label, unit=unit, qualifiers=qualifiers, values=values,
parent=parent)
def create_dependent_variable_from_graph(graph, label=None, unit=None, qualifiers=None, parent=None,
coord="y", x_values=None, error_label=None, rounding_method=None, transform=None):
ROOT = import_ROOT()
# checks
if coord not in ["x", "y"]:
raise Exception("coord must be 'x' or 'y', got {}".format(coord))
# default transform
if not callable(transform):
transform = lambda index, x_value, y_value, error: (x_value, y_value, error)
# default label
if label is None:
label = graph.GetTitle()
# helper to create splines for interpolation
def make_spline(x, y):
# delete repeated horizontal endpoints which lead to interpolation failures
x, y = list(x), list(y)
if len(x) > 1 and x[0] == x[1]:
x, y = x[1:], y[1:]
if len(x) > 1 and x[-1] == x[-2]:
x, y = x[:-1], y[:-1]
return ROOT.TSpline3("spline", create_tgraph(len(x), x, y), "", x[0], x[-1])
# build values dependent on the coordiate to extract
values = []
if coord == "x":
# when x coordinates are requested, just get graph values and optionally obtain errors
gx, gy = get_graph_points(graph)
for i, (x, y) in enumerate(zip(gx, gy)):
x, y = float(x), float(y)
# extract the error
err = {}
if error_label:
err_u = graph.GetErrorXhigh(i)
err_d = graph.GetErrorXlow(i)
err[error_label] = abs(err_u) if err_u == err_d else (err_u, err_d)
# transform if required, create a number object, and add it
x, y, err = transform(i, x, y, err)
num = Number(x, err, default_format=rounding_method)
values.append(num)
else: # coord == "y"
# when y coordinates are requested, consider custom x values and use interpolation splines
# for both nominal values and errors
points = get_graph_points(graph, errors=True)
gx, gy, errors = points[0], points[1], points[2:]
has_errors, has_asym_errors = len(errors) > 0, len(errors) > 2
spline = make_spline(gx, gy)
if error_label and has_errors:
if has_asym_errors:
spline_err_u = make_spline(gx, errors[3])
spline_err_d = make_spline(gx, errors[2])
else:
spline_err = make_spline(gx, errors[1])
# determine x values to scan
if x_values is None:
x_values = gx
for i, x in enumerate(x_values):
x = float(x)
y = spline.Eval(x)
# extract the error
err = {}
if error_label and has_errors:
if has_asym_errors:
err[error_label] = (spline_err_u.Eval(x), spline_err_d.Eval(x))
else:
err[error_label] = spline_err.Eval(x)
# transform if required, create a number object, and add it
x, y, err = transform(i, x, y, err)
num = Number(y, err, default_format=rounding_method)
values.append(num)
return create_dependent_variable(label, unit=unit, qualifiers=qualifiers, values=values,
parent=parent)
......@@ -19,7 +19,7 @@ import scipy.interpolate
from dhi.tasks.combine import DatacardTask
from dhi.tasks.limits import UpperLimits
from dhi.util import real_path, get_dcr2_path
from dhi.util import real_path, get_dcr2_path, round_digits
def _is_r2c_bbbb_boosted_ggf(task):
......@@ -121,13 +121,6 @@ def _get_limit_grid_interps(poi, scan_name):
return _limit_grid_interps[key]
def round_digits(v, n, round_fn=round):
if not v:
return v
exp = int(math.floor(math.log(abs(v), 10)))
return round_fn(v / 10.0**(exp - n + 1)) * 10**(exp - n + 1)
def define_limit_grid(task, scan_parameter_values, approx_points, debug=False):
"""
Hook called by :py:class:`tasks.limits.UpperLimit` to define a scan-parameter-dependent grid of
......@@ -247,37 +240,16 @@ def scale_multi_likelihoods(task, data):
import numpy as np
from dhi.plots.likelihoods import _preprocess_values, evaluate_likelihood_scan_1d
# projection setup
# skip the projection when the env variable is not set
setup = os.getenv("DHI_NLL_PROJECTION", None)
add_stat = False
replace = False
set_scalings = False
if setup == "hllhc":
projection, r = "3000 fb^{-1}", 3000.0 / 138.0
elif setup == "hllhc_stat":
projection, r, add_stat = "3000 fb^{-1}", 3000.0 / 138.0, True
elif setup == "run2_cms_atlas":
projection, r = "Run 2 CMS+Atlas", 2.0
elif setup == "run2_3_cms":
projection, r = "Run 2 + 3", (138.0 + 160.0) / 138.0
elif setup == "run2_3b_cms":
projection, r = "Run 2 + 3 (4y)", (138.0 + 260.0) / 138.0
elif setup:
raise Exception("scale_multi_likelihoods hook encountered unkown setup '{}'".format(setup))
else:
if not setup:
return
if not set_scalings:
scale_stat = r**-0.5
scale_thy = 1.0
scale_exp = 1.0
projection, r = "3000 fb^{-1}", 3000.0 / 138.0
print("")
print(" Run projection ".center(100, "-"))
print("name: {}".format(projection))
print("r : {}".format(r))
print("stat: {}".format(scale_stat))
print("thy : {}".format(scale_thy))
print("exp : {}".format(scale_exp))
print("name : {}".format(projection))
print("r : {}".format(r))
print("")
# input checks
......@@ -289,15 +261,16 @@ def scale_multi_likelihoods(task, data):
raise Exception("scale_multi_likelihoods hook only supports POIs r,kl,C2V, got {}".format(
poi))
# the stat+exp scan is only needed when scale_thy != scale_exp
need_statexp = scale_thy != scale_exp
# get values for all systematics and stat. only
# as this point we must anticipate them on positions 0 and 1
assert len(data) == 3
values_all = data[0]["values"]
values_stat = data[1]["values"]
if need_statexp:
values_statexp = data[2]["values"]
values_statexp = data[2]["values"]
# remove run stat+exp
del data[2:]
task.datacard_names = task.datacard_names[:2]
# helper to obtain scan information
def get_scan(values, name):
......@@ -309,57 +282,96 @@ def scale_multi_likelihoods(task, data):
# get the full and stat. only scans
scan_all = get_scan(values_all, "all")
scan_stat = get_scan(values_stat, "stat")
if need_statexp:
scan_statexp = get_scan(values_statexp, "statext")
scan_statexp = get_scan(values_statexp, "statext")
# get errors that parametrize the scaling factor
# when available, use 2 sigma estimators
sigma_idx = 1 if None not in scan_all["summary"]["uncertainty"][1] else 0
print("\nbuilding scaling factor from +-{} sigma uncertainties".format(sigma_idx + 1))
err_a_up, err_a_down = scan_all["summary"]["uncertainty"][sigma_idx]
err_b_up, err_b_down = scan_stat["summary"]["uncertainty"][sigma_idx]
err_f_up, err_f_down = 0, 0
if need_statexp:
err_f_up, err_f_down = scan_statexp["summary"]["uncertainty"][sigma_idx]
if None in [err_a_up, err_a_down, err_b_up, err_b_down, err_f_down, err_f_up]:
err_all_u, err_all_d = scan_all["summary"]["uncertainty"][sigma_idx]
err_stat_u, err_stat_d = scan_stat["summary"]["uncertainty"][sigma_idx]
err_statexp_u, err_statexp_d = scan_statexp["summary"]["uncertainty"][sigma_idx]
if None in [err_all_u, err_all_d, err_stat_u, err_stat_d, err_statexp_d, err_statexp_u]:
raise Exception("scale_multi_likelihoods hook encountered missing {} sigma interval".format(
sigma_idx + 1))
# build the scaling factors separately for both sides of the curve
x_a = scale_thy**2
x_b = scale_stat**2 - scale_exp**2
x_f = scale_exp**2 - scale_thy**2
s_up = err_a_up**2 / (err_a_up**2 * x_a + err_b_up**2 * x_b + err_f_up**2 * x_f)
s_down = err_a_down**2 / (err_a_down**2 * x_a + err_b_down**2 * x_b + err_f_down**2 * x_f)
# copy and scale values
values_projected = np.copy(values_stat if poi == "r" else values_all)
poi_min = (scan_stat if poi == "r" else scan_all)["poi_min"]
up_mask = values_projected[poi] >= poi_min
values_projected["dnll2"][up_mask] *= s_up
values_projected["dnll2"][~up_mask] *= s_down
# replace existing curves if requested
if replace:
del data[:]
task.datacard_names = tuple()
# add a new entry
data.append({
"values": values_projected,
"poi_min": poi_min,
})
task.datacard_names += (projection,)
# also add a stat. only curve if requested
if add_stat:
values_projected_stat = np.copy(values_stat)
values_projected_stat["dnll2"] *= r
def add_projection(label, scale_stat, scale_exp, scale_thy):
assert err_all_u > err_statexp_u > err_stat_u
assert err_all_d <= err_statexp_d <= err_stat_d
# when the systematics scale is low (e.g. 1.0) one should rather scale the run 2 stat only
# curve, but when systematics are assumed to be unchanged (e.g. 1.0), scale the full run 2;
# therefore, use an average between the two based on the overall systematic scale;
# to compute this, create a weighted mean of the exp and thy scales using errors as weights
err_thy_u = (err_all_u**2 - err_statexp_u**2)**0.5
err_thy_d = (err_all_d**2 - err_statexp_d**2)**0.5
err_exp_u = (err_statexp_u**2 - err_stat_u**2)**0.5
err_exp_d = (err_statexp_d**2 - err_stat_d**2)**0.5
# weighted average
scale_syst_u = (err_thy_u * scale_thy + err_exp_u * scale_exp) / (err_thy_u + err_exp_u)
scale_syst_d = (err_thy_d * scale_thy + err_exp_d * scale_exp) / (err_thy_d + err_exp_d)
# for smoothing between them, consider the two values spanning an ellipse and get the point
# on the circumference with the syst scale (between 0.0 and 1.0) being interpreted as an
# angle (between 0 and pi/2)