Commit bdeb2ce0 authored by Long Wang's avatar Long Wang
Browse files

adding VHH class

parent 4e9b83c1
Pipeline #2494083 skipped with stage
......@@ -177,7 +177,7 @@ class VBFHHFormula:
#########################################
class HHModel(PhysicsModel):
class HHHModel(PhysicsModel):
"""
Models the HH production as linear sum of the input components for VBF (>= 6) and GGF (>= 3).
The following physics options are supported:
......@@ -609,6 +609,322 @@ class HHModel(PhysicsModel):
raise Exception("\n".join(msg))
class VHHModel(PhysicsModel):
def __init__(self, vhh_sample_list, name):
PhysicsModel.__init__(self)
self.doNNLOscaling = None
self.doBRscaling = None
self.doHscaling = None
self.doklDependentUnc = None
self.doProfilekl = None
self.doProfilekt = None
self.doProfileCV = None
self.doProfileC2V = None
self.klUncName = "THU_HH"
self.name = name
self.check_validity_vhh(vhh_sample_list)
self.vhh_formula = VBFHHFormula(vhh_sample_list)
self.scalingMap = defaultdict(list)
self.dump_inputs()
def setPhysicsOptions(self, physOptions):
opts = [opt.split("=", 1) for opt in physOptions if "=" in opt]
known_flags = ["doNNLOscaling", "doBRscaling", "doHscaling", "doklDependentUnc"]
known_params = ["doProfilekl", "doProfilekt", "doProfileCV", "doProfileC2V"]
for key, value in opts:
# identify boolean flags
if key in known_flags:
flag = value.lower() in ["yes", "true", "1"]
setattr(self, key, flag)
print("[INFO] set {} of model {} to {}".format(key, self.name, flag))
continue
# identify remaining "key=value" parameter pairs
if key in known_params:
# special case: interpret empty string and "None" as actual None
if value.lower() in ("", "none"):
value = None
setattr(self, key, value)
print("[INFO] set {} of model {} to {}".format(key, self.name, value))
continue
def check_validity_vhh(self, vhh_sample_list):
if len(vhh_sample_list) < 6:
raise RuntimeError("%s : malformed VBF input sample list - expect at least 6 samples" % self.name)
if not isinstance(vhh_sample_list, list) and not isinstance(vhh_sample_list, tuple):
raise RuntimeError("%s : malformed VBF input sample list - expect list or tuple" % self.name)
for s in vhh_sample_list:
if not isinstance(s, VBFHHSample):
raise RuntimeError("%s : malformed VBF input sample list - each element must be a VBFHHSample" % self.name)
def dump_inputs(self):
print "[INFO] VHH model : " , self.name
print "...... VHH configuration"
for i,s in enumerate(self.vhh_formula.sample_list):
print " {0:<3} ... CV : {1:<3}, C2V : {2:<3}, kl : {3:<3}, xs : {4:<3.8f} pb, label : {5}".format(i, s.val_CV, s.val_C2V, s.val_kl, s.val_xs, s.label)
def doParametersOfInterest(self):
## the model is built with:
## r x [GGF + VBF]
## GGF = r_GGF x [sum samples(kl, kt)]
## VBF = r_VBF x [sum samples(kl, CV, C2V)]
# add rate POIs and freeze r_* by default
self.modelBuilder.doVar("r[1,-20,20]")
#self.modelBuilder.doVar("r_gghh[1,-20,20]")
#self.modelBuilder.doVar("r_qqhh[1,-20,20]")
#self.modelBuilder.out.var("r_gghh").setConstant(True)
#self.modelBuilder.out.var("r_qqhh").setConstant(True)
pois = ["r"]
# define kappa parameters and their uniform ranges
kappas = OrderedDict([
("kl", (-30, 30)),
("CV", (-10, 10)),
("C2V", (-10, 10)),
])
# add them
for name, (start, stop) in kappas.items():
# define the variable
self.modelBuilder.doVar("{}[1,{},{}]".format(name, start, stop))
# only make it a POI when it is not profile
do_profile = name == "kl" and bool(self.doProfilekl)
do_profile |= name == "CV" and bool(self.doProfileCV)
do_profile |= name == "C2V" and bool(self.doProfileC2V)
if not do_profile:
self.modelBuilder.out.var(name).setConstant(True)
pois.append(name)
# define the POI group
self.modelBuilder.doSet("POI", ",".join(pois))
print("using POIs {}".format(",".join(pois)))
# set or redefine the MH variable on which some of the BRs depend
if not self.options.mass:
raise Exception("invalid mass value '{}', please provide a valid value using the "
"--mass option".format(self.options.mass))
if self.modelBuilder.out.var("MH"):
self.modelBuilder.out.var("MH").removeRange()
self.modelBuilder.out.var("MH").setVal(self.options.mass)
else:
self.modelBuilder.doVar("MH[%f]" % self.options.mass)
self.modelBuilder.out.var("MH").setConstant(True)
# add objects for kl dependent theory uncertainties
if self.doklDependentUnc:
self.makeklDepTheoUncertainties()
# create cross section scaling functions
self.create_scalings()
def preProcessNuisances(self, nuisances):
''' this method is executed before nuisances are processed'''
if self.doklDependentUnc:
nuisances.append((self.klUncName, False, "param", ["0", "1"], []))
# enable profiling of kappas with a configurable prior
for name in ["kl", "CV", "C2V"]:
value = getattr(self, "doProfile" + name)
if not value:
continue
# get the prior and add it
prior, value = value.split(",", 1) if "," in value else (value, None)
if prior == "flat":
self.modelBuilder.DC.flatParamNuisances[name] = True
print("adding flat prior for parameter {}".format(name))
elif prior == "gauss":
nuisances.append((name, False, "param", ["1", value, "[-7,7]"], []))
print("adding gaussian prior for parameter {} with width {}".format(name, value))
else:
raise Exception("unknown prior '{}' for parameter {}".format(prior, name))
def makeInterpolation(self, nameout, nameHi, nameLo, x):
# as in https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit/blob/102x/interface/ProcessNormalization.h
## maybe we can try to reuse that should be fast
d = {"name":nameout, "hi":nameHi, "lo":nameLo, 'x':x}
d['logKhi'] = "log({hi})".format(**d)
d['logKlo'] = "-log({lo})".format(**d)
d['avg'] = "0.5*({logKhi} + {logKlo})".format(**d)
d['halfdiff'] = "0.5*({logKhi} - {logKlo})".format(**d)
d["twox"] = "2*{x}".format(**d)
d["twox2"] = "({twox})*({twox})".format(**d)
d['alpha'] = '0.125 * {twox} * ({twox2} * ({twox2} - 10.) + 15.)'.format(**d)
d['retCent'] = "{avg}+{alpha}*{halfdiff}".format(**d)
d['retLow'] = d['logKlo']
d['retHigh'] = d['logKhi']
d['retFull'] = "{x} <= -0.5 ? ({retLow}) : {x} >= 0.5 ? ({retHigh}) : ({retCent})".format(**d)
d['ret'] = 'expr::{name}("exp({retFull})",{{{hi},{lo},{x}}})'.format(**d)
# print "[DEBUG]","[makeInterpolation]","going to build: ",d['ret']
self.modelBuilder.factory_(d['ret'])
def makeklDepTheoUncertainties(self):
''' Construct and import uncertanties on the workspace'''
#upper_unc[kl] = Max[72.0744-51.7362*kl+11.3712*kl2, 70.9286-51.5708*kl+11.4497*kl2] in fb.
#lower_unc[kl] = Min[66.0621-46.7458*kl+10.1673*kl2, 66.7581-47.721*kl+10.4535*kl2] in fb.
# if not self.doklDependentUnc: return
self.modelBuilder.doVar("%s[-7,7]" % self.klUncName)
self.modelBuilder.factory_('expr::%s_kappaHi("max(72.0744-51.7362*@0+11.3712*@0*@0,70.9286-51.5708*@0+11.4497*@0*@0) / (70.3874 - 50.4111*@0 + 11.0595*@0*@0)",kl)' % self.klUncName)
self.modelBuilder.factory_('expr::%s_kappaLo("min(66.0621-46.7458*@0+10.1673*@0*@0,66.7581-47.721*@0+10.4535*@0*@0) / (70.3874 - 50.4111*@0 + 11.0595*@0*@0)",kl)' % self.klUncName)
self.makeInterpolation("%s_kappa" % self.klUncName, "%s_kappaHi" % self.klUncName, "%s_kappaLo" % self.klUncName , self.klUncName)
## make scaling
self.modelBuilder.factory_("expr::scaling_{name}(\"pow(@0,@1)\",{name}_kappa,{name})".format(name=self.klUncName))
def create_scalings(self):
"""
Create the functions that scale the >= 6 components of vhh
as well as the single Higgs and BR scalings.
"""
self.HBRscal = HBRscaler(self.modelBuilder, self.doBRscaling, self.doHscaling)
self.f_r_vhh_names = [] # the RooFormulae that scale the components (VHH)
def pow_to_mul_string(expr):
""" Convert integer powers in an expression to Muls, like a**2 => a*a. Returns a string """
pows = list(expr.atoms(Pow))
if any(not e.is_Integer for b, e in (i.as_base_exp() for i in pows)):
raise ValueError("A power contains a non-integer exponent")
s = str(expr)
repl = zip(pows, (Mul(*[b]*e,evaluate=False) for b,e in (i.as_base_exp() for i in pows)))
for fr, to in repl:
s = s.replace(str(fr), str(to))
return s
### loop on the VHH scalings
for i, s in enumerate(self.vhh_formula.sample_list):
# f_name = 'f_vbfhhscale_sample_{0}'.format(i)
f_name = 'f_vhhhhscale_sample_{0}'.format(s.label)
f_expr = self.vhh_formula.coeffs[i] # the function that multiplies each sample
# print f_expr
# for ROOFit, this will convert expressions as a**2 to a*a
s_expr = pow_to_mul_string(f_expr)
couplings_in_expr = []
if 'CV' in s_expr: couplings_in_expr.append('CV')
if 'C2V' in s_expr: couplings_in_expr.append('C2V')
if 'kl' in s_expr: couplings_in_expr.append('kl')
# no constant expressions are expected
if len(couplings_in_expr) == 0:
raise RuntimeError('VHH : scaling expression has no coefficients')
for idx, ce in enumerate(couplings_in_expr):
# print '..replacing', ce
symb = '@{}'.format(idx)
s_expr = s_expr.replace(ce, symb)
arglist = ','.join(couplings_in_expr)
exprname = 'expr::{}(\"{}\" , {})'.format(f_name, s_expr, arglist)
# print exprname
self.modelBuilder.factory_(exprname) # the function that scales each VBF sample
f_prod_name = f_name + '_r'
prodname = 'prod::{}(r,{})'.format(f_prod_name, f_name)
self.modelBuilder.factory_(prodname) ## the function that scales this production mode
# self.modelBuilder.out.function(f_prod_name).Print("") ## will just print out the values
self.f_r_vhh_names.append(f_prod_name) #bookkeep the scaling that has been created
def getYieldScale(self, bin, process):
def find_hh_matches(samples, kind):
# get the matching hh sample index
matching_indices = []
for i, sample in enumerate(samples):
if process.startswith(sample.label):
matching_indices.append(i)
# complain when there is more than one hit
if len(matching_indices) > 1:
raise Exception("found {} matches for {} signal process {} in bin {}".format(
len(matching_indices), kind, process, bin))
# return the index when there is only one hit
if len(matching_indices) == 1:
return matching_indices[0]
# otherwise, return nothing
return None
# vhh match?
isample = find_hh_matches(self.vhh_formula.sample_list, "VHH")
if isample is not None:
self.scalingMap[process].append((isample, "VHH"))
scaling = self.f_r_vhh_names[isample]
# when the BR scaling is enabled, try to extract the decays from the process name
if self.doBRscaling:
scaling = self.HBRscal.buildXSBRScalingHH(scaling, process) or scaling
return scaling
# complain when the process is a signal but no sample matched
if self.DC.isSignal[process]:
raise Exception("HH process {} did not match any GGF or VBF samples in bin {}".format(
process, bin))
# single H match?
if self.doHscaling:
scaling = self.HBRscal.findSingleHMatch(process)
# when the BR scaling is enabled, try to extract the decay from the process name
if scaling and self.doBRscaling:
scaling = self.HBRscal.buildXSBRScalingH(scaling, process) or scaling
return scaling or 1.
# at this point we are dealing with a background process that is also not single-H-scaled,
# so it is safe to return 1 since any misconfiguration should have been raised already
return 1.
def done(self):
super(VHHModel, self).done()
# get the labels of ggF and VBF samples and store a flag to check if they were matched
matches_vhh = OrderedDict((s.label, []) for s in self.vhh_formula.sample_list)
# go through the scaling map and match to samples
for sample_name in self.scalingMap:
matches = matches_vhh if sample_name.startswith("VHHTo4B_")
for sample_label in matches:
if sample_name.startswith(sample_label):
matches[sample_label].append(sample_name)
break
# print matches
max_len = max(len(label) for label in list(matches_vhh.keys()))
print("Matching signal samples:")
for label, names in list(matches_vhh.items()):
print(" {}{} -> {}".format(label, " " * (max_len - len(label)), ", ".join(names)))
# complain about samples that were not matched by any process
unmatched_vhh_samples = [label for label, names in matches_vhh.items() if not names]
msg = []
if len(unmatched_vhh_samples) not in [0, len(self.vhh_formula.sample_list)]:
msg.append("{} VHH signal samples were not matched by any process: {}".format(
len(unmatched_vhh_samples), ", ".join(unmatched_vhh_samples)))
if msg:
raise Exception("\n".join(msg))
# ggf samples with keys (kl, kt), ordered by kl
# cross section values are NLO with k-factor applied and only used in create_ggf_xsec_func below
ggf_samples = OrderedDict([
......@@ -630,6 +946,16 @@ vbf_samples = OrderedDict([
((1.5, 1, 1), VBFHHSample(1.5, 1, 1, val_xs=0.0660185, label="qqHH_CV_1p5_C2V_1_kl_1")),
])
# vhh samples with keys (CV, C2V, kl), SM point first, then ordered by kl, then C2V, then CV
vhh_samples = OrderedDict([
((1, 1, 1), VBFHHSample(1, 1, 1, val_xs=0.0017260, label="VHHTo4B_CV_1_0_C2V_1_0_C3_1_0")),
((1, 0, 1), VBFHHSample(1, 0, 1, val_xs=0.0046089, label="VHHTo4B_CV_1_0_C2V_0_0_C3_1_0")),
((0.5, 1, 1), VBFHHSample(0.5, 1, 1, val_xs=0.0014228, label="VHHTo4B_CV_0_5_C2V_1_0_C3_1_0")),
((1.5, 1, 1), VBFHHSample(1.5, 1, 1, val_xs=0.0270800, label="VHHTo4B_CV_1_5_C2V_1_0_C3_1_0")),
((1, 2, 1), VBFHHSample(1, 2, 1, val_xs=0.0142178, label="VHHTo4B_CV_1_0_C2V_2_0_C3_1_0")),
((1, 1, 2), VBFHHSample(1, 1, 2, val_xs=0.0108237, label="VHHTo4B_CV_1_0_C2V_1_0_C3_2_0")),
])
def get_ggf_samples(keys):
"""
......@@ -660,13 +986,21 @@ def get_vbf_samples(keys):
get_vbf_samples([2, (1, 2, 1)])
# -> [VBFHHSample:qqHH_CV_1_C2V_1_kl_2, VBFHHSample:qqHH_CV_1_C2V_2_kl_1]
"""
all_keys = list(ggf_samples.keys())
all_keys = list(vbf_samples.keys())
return [
vbf_samples[key if isinstance(key, tuple) else all_keys[key]]
for key in keys
]
def get_vhh_samples(keys):
all_keys = list(vhh_samples.keys())
return [
vhh_samples[key if isinstance(key, tuple) else all_keys[key]]
for key in keys
]
def create_model(name, skip_ggf=None, skip_vbf=None):
"""
Returns a new :py:class:`HHModel` instance named *name*. Its ggf and vbf sample lists are
......@@ -693,6 +1027,22 @@ def create_model(name, skip_ggf=None, skip_vbf=None):
name=name,
)
def create_VHHmodel(name, skip_vhh=None):
# get all unskipped vhh keys
vhh_keys = []
for i, key in enumerate(vhh_samples):
if not skip_vhh or not any(skip_key in (i, key) for skip_key in skip_vhh):
vhh_keys.append(key)
print(vhh_keys)
# create and return the model
return VHHModel(
vhh_sample_list=get_vhh_samples(vhh_keys),
name=name,
)
# some named, default models
model_all = create_model("model_all")
......@@ -701,7 +1051,8 @@ model_bbtautau = create_model("model_bbtautau", skip_ggf=[(0, 1)], skip_vbf=[(0.
# model used for the combination
model_comb = create_model("model_comb", skip_ggf=[(0, 1)], skip_vbf=[(0.5, 1, 1)])
# for legacy purpose, reference to the default model which is exactly model_comb
model_default = create_model("model_default", skip_ggf=[(0, 1)], skip_vbf=[(0.5, 1, 1)])
##model_default = create_model("model_default", skip_ggf=[(0, 1)], skip_vbf=[(0.5, 1, 1)])
model_default = create_VHHmodel("model_default")
# ggf test models
model_no_ggf_kl0 = create_model("model_no_ggf_kl0", skip_ggf=[(0, 1)], skip_vbf=[(1, 0, 1)])
......@@ -936,10 +1287,12 @@ def create_hh_xsec_func(ggf_formula=None, vbf_formula=None):
#: Default ggF cross section getter using the formula of the *model_default* model.
get_ggf_xsec = create_ggf_xsec_func(model_default.ggf_formula)
##get_ggf_xsec = create_ggf_xsec_func(model_default.ggf_formula)
#: Default VBF cross section getter using the formula of the *model_default* model.
get_vbf_xsec = create_vbf_xsec_func(model_default.vbf_formula)
##get_vbf_xsec = create_vbf_xsec_func(model_default.vbf_formula)
#: Default combined cross section getter using the formulas of the *model_default* model.
get_hh_xsec = create_hh_xsec_func(model_default.ggf_formula, model_default.vbf_formula)
##get_hh_xsec = create_hh_xsec_func(model_default.ggf_formula, model_default.vbf_formula)
......@@ -874,8 +874,8 @@ class ParameterScanTask(AnalysisTask):
class POITask(DatacardTask, ParameterValuesTask):
r_pois = ("r", "r_qqhh", "r_gghh")
k_pois = ("kl", "kt", "CV", "C2V")
r_pois = ("r",)
k_pois = ("kl", "CV", "C2V")
all_pois = r_pois + k_pois
pois = law.CSVParameter(
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment