Skip to content
Snippets Groups Projects

Dev

Merged Marcel Rieger requested to merge dev into master
1 file
+ 436
0
Compare changes
  • Side-by-side
  • Inline
+ 436
0
###################################
# Author : L. Cadamuro (UF)
# Date : 22/04/2020
# Brief : code that implements the HH model in combine
# structure of the code :
# xxHHSample -> defines the interface to the user, that will pass the xs and the coupling setups
# xxHHFormula -> implements the matrix component representation, that calculates the symbolic scalings
# HHModel -> implements the interfaces to combine
###################################
from HiggsAnalysis.CombinedLimit.PhysicsModel import *
from sympy import *
from numpy import matrix
from numpy import linalg
from sympy import Matrix
from collections import OrderedDict
class VBFHHSample:
def __init__(self, val_CV, val_C2V, val_kl, val_xs, label):
self.val_CV = val_CV
self.val_C2V = val_C2V
self.val_kl = val_kl
self.val_xs = val_xs
self.label = label
####################
class GGFHHSample:
def __init__(self, val_kl, val_kt, val_xs, label):
self.val_kl = val_kl
self.val_kt = val_kt
self.val_xs = val_xs
self.label = label
####################
class GGFHHFormula:
def __init__(self, sample_list):
self.sample_list = sample_list
self.build_matrix()
self.calculatecoeffients()
def build_matrix(self):
""" create the matrix M in this object """
# the code will combine as many samples as passed to the input
# into a matrix with 3 columns and Nsamples rows
nrows = len(self.sample_list)
ncols = 3
M_tofill = [[None]*ncols for i in range(nrows)]
for isample, sample in enumerate(self.sample_list):
## implement the 3 scalings - box, triangle, interf
M_tofill[isample][0] = sample.val_kt**4
M_tofill[isample][1] = sample.val_kt**2 * sample.val_kl**2
M_tofill[isample][2] = sample.val_kt**3 * sample.val_kl
# print M_tofill
self.M = Matrix(M_tofill)
def calculatecoeffients(self):
""" create the function sigma and the six coefficients in this object """
try: self.M
except AttributeError: self.build_matrix()
# ##############################################
kl, kt, box, tri, interf = symbols('kl kt box tri interf')
samples_symb = OrderedDict() # order is essential -> OrderedDict
Nsamples = self.M.shape[0] #num rows
for i in range(Nsamples):
sname = 's%i' % i
samples_symb[sname] = Symbol(sname)
### the vector of couplings
c = Matrix([
[kt**4] ,
[kt**2 * kl**2] ,
[kt**3 * kl] ,
])
### the vector of components
v = Matrix([
[box] ,
[tri] ,
[interf],
])
### the vector of samples (i.e. cross sections)
symb_list = [[sam] for sam in samples_symb.values()]
s = Matrix(symb_list)
####
Minv = self.M.pinv()
self.coeffs = c.transpose() * Minv # coeffs * s is the sigma, accessing per component gives each sample scaling
self.sigma = self.coeffs*s
#########################################
class VBFHHFormula:
def __init__(self, sample_list):
self.sample_list = sample_list
self.build_matrix()
self.calculatecoeffients()
def build_matrix(self):
""" create the matrix M in this object """
# the code will combine as many samples as passed to the input
# into a matrix with 6 columns and Nsamples rows
nrows = len(self.sample_list)
ncols = 6
M_tofill = [[None]*ncols for i in range(nrows)]
for isample, sample in enumerate(self.sample_list):
# implement the 3 scalings - box, triangle, interf
M_tofill[isample][0] = sample.val_CV**2 * sample.val_kl**2
M_tofill[isample][1] = sample.val_CV**4
M_tofill[isample][2] = sample.val_C2V**2
M_tofill[isample][3] = sample.val_CV**3 * sample.val_kl
M_tofill[isample][4] = sample.val_CV * sample.val_C2V * sample.val_kl
M_tofill[isample][5] = sample.val_CV**2 * sample.val_C2V
# print M_tofill
self.M = Matrix(M_tofill)
def calculatecoeffients(self):
""" create the function sigma and the six coefficients in this object """
try: self.M
except AttributeError: self.build_matrix()
##############################################
CV, C2V, kl, a, b, c, iab, iac, ibc = symbols('CV C2V kl a b c iab iac ibc')
samples_symb = OrderedDict() # order is essential -> OrderedDict
Nsamples = self.M.shape[0] #num rows
for i in range(Nsamples):
sname = 's%i' % i
samples_symb[sname] = Symbol(sname)
### the vector of couplings
c = Matrix([
[CV**2 * kl**2] ,
[CV**4] ,
[C2V**2] ,
[CV**3 * kl] ,
[CV * C2V * kl] ,
[CV**2 * C2V]
])
### the vector of components
v = Matrix([
[a] ,
[b] ,
[c] ,
[iab] ,
[iac] ,
[ibc]
])
### the vector of samples (i.e. cross sections)
symb_list = [[sam] for sam in samples_symb.values()]
s = Matrix(symb_list)
####
Minv = self.M.pinv()
self.coeffs = c.transpose() * Minv # coeffs * s is the sigma, accessing per component gives each sample scaling
self.sigma = self.coeffs*s
#########################################
class HHModel(PhysicsModel):
""" Models the HH production as linear sum of the input components for VBF (>= 6) and GGF (>= 3) """
def __init__(self, ggf_sample_list, vbf_sample_list, name):
PhysicsModel.__init__(self)
self.name = name
self.check_validity_ggf(ggf_sample_list)
self.check_validity_vbf(vbf_sample_list)
self.ggf_formula = GGFHHFormula(ggf_sample_list)
self.vbf_formula = VBFHHFormula(vbf_sample_list)
self.dump_inputs()
def check_validity_ggf(self, ggf_sample_list):
if len(ggf_sample_list) < 3:
raise RuntimeError("%s : malformed GGF input sample list - expect at least 3 samples" % self.name)
if not isinstance(ggf_sample_list, list) and not isinstance(ggf_sample_list, tuple):
raise RuntimeError("%s : malformed GGF input sample list - expect list or tuple" % self.name)
for s in ggf_sample_list:
if not isinstance(s, GGFHHSample):
raise RuntimeError("%s : malformed GGF input sample list - each element must be a GGFHHSample" % self.name)
def check_validity_vbf(self, vbf_sample_list):
if len(vbf_sample_list) < 6:
raise RuntimeError("%s : malformed VBF input sample list - expect at least 6 samples" % self.name)
if not isinstance(vbf_sample_list, list) and not isinstance(vbf_sample_list, tuple):
raise RuntimeError("%s : malformed VBF input sample list - expect list or tuple" % self.name)
for s in vbf_sample_list:
if not isinstance(s, VBFHHSample):
raise RuntimeError("%s : malformed VBF input sample list - each element must be a VBFHHSample" % self.name)
def dump_inputs(self):
print "[INFO] HH model : " , self.name
print "...... GGF configuration"
for i,s in enumerate(self.ggf_formula.sample_list):
print " {0:<3} ... kl : {1:<3}, kt : {2:<3}, xs : {3:<3.8f} pb, label : {4}".format(i, s.val_kl, s.val_kt, s.val_xs, s.label)
print "...... VBF configuration"
for i,s in enumerate(self.vbf_formula.sample_list):
print " {0:<3} ... CV : {1:<3}, C2V : {2:<3}, kl : {3:<3}, xs : {4:<3.8f} pb, label : {5}".format(i, s.val_CV, s.val_C2V, s.val_kl, s.val_xs, s.label)
def doParametersOfInterest(self):
## the model is built with:
## r x [GGF + VBF]
## GGF = r_GGF x [sum samples(kl, kt)]
## VBF = r_VBF x [sum samples(kl, CV, C2V)]
POIs = "r,r_gghh,r_qqhh,CV,C2V,kl,kt"
self.modelBuilder.doVar("r[1,-20,20]")
self.modelBuilder.doVar("r_gghh[1,-20,20]")
self.modelBuilder.doVar("r_qqhh[1,-20,20]")
self.modelBuilder.doVar("CV[1,-10,10]")
self.modelBuilder.doVar("C2V[1,-10,10]")
self.modelBuilder.doVar("kl[1,-30,30]")
self.modelBuilder.doVar("kt[1,-10,10]")
self.modelBuilder.doSet("POI",POIs)
self.modelBuilder.out.var("r_gghh") .setConstant(True)
self.modelBuilder.out.var("r_qqhh") .setConstant(True)
self.modelBuilder.out.var("CV") .setConstant(True)
self.modelBuilder.out.var("C2V") .setConstant(True)
self.modelBuilder.out.var("kl") .setConstant(True)
self.modelBuilder.out.var("kt") .setConstant(True)
self.create_scalings()
def create_scalings(self):
""" create the functions that scale the >= 6 components of vbf and the >= 3 components of ggf """
self.f_r_vbf_names = [] # the RooFormulae that scale the components (VBF)
self.f_r_ggf_names = [] # the RooFormulae that scale the components (GGF)
def pow_to_mul_string(expr):
""" Convert integer powers in an expression to Muls, like a**2 => a*a. Returns a string """
pows = list(expr.atoms(Pow))
if any(not e.is_Integer for b, e in (i.as_base_exp() for i in pows)):
raise ValueError("A power contains a non-integer exponent")
s = str(expr)
repl = zip(pows, (Mul(*[b]*e,evaluate=False) for b,e in (i.as_base_exp() for i in pows)))
for fr, to in repl:
s = s.replace(str(fr), str(to))
return s
### loop on the GGF scalings
for i, s in enumerate(self.ggf_formula.sample_list):
# f_name = 'f_ggfhhscale_sample_{0}'.format(i)
f_name = 'f_ggfhhscale_sample_{0}'.format(s.label)
f_expr = self.ggf_formula.coeffs[i] # the function that multiplies each sample
# print f_expr
# for ROOFit, this will convert expressions as a**2 to a*a
s_expr = pow_to_mul_string(f_expr)
couplings_in_expr = []
if 'kl' in s_expr: couplings_in_expr.append('kl')
if 'kt' in s_expr: couplings_in_expr.append('kt')
# no constant expressions are expected
if len(couplings_in_expr) == 0:
raise RuntimeError('GGF HH : scaling expression has no coefficients')
for idx, ce in enumerate(couplings_in_expr):
# print '..replacing', ce
symb = '@{}'.format(idx)
s_expr = s_expr.replace(ce, symb)
arglist = ','.join(couplings_in_expr)
exprname = 'expr::{}(\"{}\" , {})'.format(f_name, s_expr, arglist)
# print exprname
self.modelBuilder.factory_(exprname) # the function that scales each VBF sample
f_prod_name_pmode = f_name + '_r_gghh'
prodname_pmode = 'prod::{}(r_gghh,{})'.format(f_prod_name_pmode, f_name)
self.modelBuilder.factory_(prodname_pmode) ## the function that scales this production mode
# self.modelBuilder.out.function(f_prod_name).Print("") ## will just print out the values
f_prod_name = f_prod_name_pmode + '_r'
prodname = 'prod::{}(r,{})'.format(f_prod_name, f_prod_name_pmode)
self.modelBuilder.factory_(prodname) ## the function that scales this production mode
# self.modelBuilder.out.function(f_prod_name).Print("") ## will just print out the values
self.f_r_ggf_names.append(f_prod_name) #bookkeep the scaling that has been created
### loop on the VBF scalings
for i, s in enumerate(self.vbf_formula.sample_list):
# f_name = 'f_vbfhhscale_sample_{0}'.format(i)
f_name = 'f_vbfhhscale_sample_{0}'.format(s.label)
f_expr = self.vbf_formula.coeffs[i] # the function that multiplies each sample
# print f_expr
# for ROOFit, this will convert expressions as a**2 to a*a
s_expr = pow_to_mul_string(f_expr)
couplings_in_expr = []
if 'CV' in s_expr: couplings_in_expr.append('CV')
if 'C2V' in s_expr: couplings_in_expr.append('C2V')
if 'kl' in s_expr: couplings_in_expr.append('kl')
# no constant expressions are expected
if len(couplings_in_expr) == 0:
raise RuntimeError('VBF HH : scaling expression has no coefficients')
for idx, ce in enumerate(couplings_in_expr):
# print '..replacing', ce
symb = '@{}'.format(idx)
s_expr = s_expr.replace(ce, symb)
arglist = ','.join(couplings_in_expr)
exprname = 'expr::{}(\"{}\" , {})'.format(f_name, s_expr, arglist)
# print exprname
self.modelBuilder.factory_(exprname) # the function that scales each VBF sample
f_prod_name_pmode = f_name + '_r_qqhh'
prodname_pmode = 'prod::{}(r_qqhh,{})'.format(f_prod_name_pmode, f_name)
self.modelBuilder.factory_(prodname_pmode) ## the function that scales this production mode
# self.modelBuilder.out.function(f_prod_name_pmode).Print("") ## will just print out the values
f_prod_name = f_prod_name_pmode + '_r'
prodname = 'prod::{}(r,{})'.format(f_prod_name, f_prod_name_pmode)
self.modelBuilder.factory_(prodname) ## the function that scales this production mode
# self.modelBuilder.out.function(f_prod_name).Print("") ## will just print out the values
self.f_r_vbf_names.append(f_prod_name) #bookkeep the scaling that has been created
def getYieldScale(self,bin,process):
## my control to verify for a unique association between process <-> scaling function
try:
self.scalingMap
except AttributeError:
self.scalingMap = {}
if not self.DC.isSignal[process]: return 1
# match the process name in the datacard to the input sample of the calculation
# this is the only point where the two things must be matched
if not process in self.scalingMap:
self.scalingMap[process] = []
imatched_ggf = []
imatched_vbf = []
for isample, sample in enumerate(self.ggf_formula.sample_list):
if process.startswith(sample.label):
# print self.name, ": {:>40} ===> {:>40}".format(process, sample.label)
imatched_ggf.append(isample)
for isample, sample in enumerate(self.vbf_formula.sample_list):
if process.startswith(sample.label):
# print self.name, ": {:>40} ===> {:>40}".format(process, sample.label)
imatched_vbf.append(isample)
## this checks that a process finds a unique scaling
if len(imatched_ggf) + len(imatched_vbf) != 1:
print "[ERROR] : in HH model named", self.name, "there are", len(imatched_ggf), "GGF name matches and", len(imatched_vbf), "VBF name matches"
# raise RuntimeError('HHModel : could not uniquely match the process %s to the expected sample list' % process)
if len(imatched_ggf) == 1:
isample = imatched_ggf[0]
self.scalingMap[process].append((isample, 'GGF'))
return self.f_r_ggf_names[isample]
if len(imatched_vbf) == 1:
isample = imatched_vbf[0]
self.scalingMap[process].append((isample, 'VBF'))
return self.f_r_vbf_names[isample]
return 1 ## for signal processes that are not ggHH or qqHH -> to implement here the single H scaling
# raise RuntimeError('HHModel : fatal error in getYieldScale - this should never happen')
#########################################
# GGF : val_kl, val_kt
GGF_sample_list = [
GGFHHSample(1,1, val_xs = 0.02675, label = 'ggHH_kl_1_kt_1' ),
# GGFHHSample(0,1, val_xs = 0.06007, label = 'ggHH_kl_0_kt_1' ),
GGFHHSample(5,1, val_xs = 0.07903, label = 'ggHH_kl_5_kt_1' ),
GGFHHSample(2.45,1, val_xs = 0.01133, label = 'ggHH_kl_2p45_kt_1' ),
]
# VBF : val_CV, val_C2V, val_kl
VBF_sample_list = [
VBFHHSample(1,1,1, val_xs = 0.00054/(0.3364), label = 'qqHH_CV_1_C2V_1_kl_1' ),
VBFHHSample(1,2,1, val_xs = 0.00472/(0.3364), label = 'qqHH_CV_1_C2V_2_kl_1' ),
VBFHHSample(1,1,2, val_xs = 0.00044/(0.3364), label = 'qqHH_CV_1_C2V_1_kl_2' ),
VBFHHSample(1,1,0, val_xs = 0.00145/(0.3364), label = 'qqHH_CV_1_C2V_1_kl_0' ),
VBFHHSample(0.5,1,1, val_xs = 0.00353/(0.3364), label = 'qqHH_CV_0p5_C2V_1_kl_1'),
VBFHHSample(1.5,1,1, val_xs = 0.02149/(0.3364), label = 'qqHH_CV_1p5_C2V_1_kl_1'),
]
HHdefault = HHModel(
ggf_sample_list = GGF_sample_list,
vbf_sample_list = VBF_sample_list,
name = 'HHdefault'
)
# f = GGFHHFormula(GGF_sample_list)
# f = VBFHHFormula(VBF_sample_list)
Loading