Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ttravis/HiggsDNA
  • pkrueper/higgs-dna-forked-from-main-21-feb-for-stxs-3
  • cdaumann/HiggsDNA
  • pkrueper/HiggsDNA
  • chpan/higgs-dna-fat-jet
  • idutta/HiggsDNA-Run3
  • asamalan/higgs-dna-amrutha
  • fkhuzaim/HiggsDNA
  • jafan/HiggsDNA
  • zhdong/HiggsDNA
  • bdanzi/HiggsDNA
  • takumar/higgs-dna-4-gamma
  • chenhua/HiggsDNA
  • joocain/HiggsDNA
  • castells/HiggsDNA
  • higgsdna/HiggsDNA
  • ganiendo/HiggsDNA
  • evourlio/HiggsDNA
  • staj/higgs-dna-1
  • sqian/HiggsDNA
  • pdevouge/HiggsDNA
  • skamble/HiggsDNA
  • elfontan/HiggsDNA
  • joocain/HiggsDNAfomHHbbgg
  • gchavez/HiggsDNA
  • atsatsos/HiggsDNA
  • chuxue/HiggsDNA
  • rkumarag/HiggsDNA
  • chouy/HiggsDNA
  • chpan/higgs-dna-chpan-split-method
  • chpan/higgs-dna-chpan-njet
  • atarabin/HiggsDNA
  • ksavva/HiggsDNA
  • dasgupsu/HiggsDNA
  • gpaspala/HiggsDNA
  • mikumar/HiggsDNA
  • ikrommyd/HiggsDNA
  • lrussell/HiggsDNA
  • moameen/HiggsDNA
  • niharrin/HiggsDNA
  • staj/HiggsDNA
  • hhbbgg/HiggsDNA
  • athachay/HiggsDNA
  • chpan/higgs-dna-chpan-3
  • dwinterb/HiggsDNA
  • rgargiul/HiggsDNA
  • chpan/higgs-dna-chpan-2
  • prrout/HiggsDNA
  • jixiao/HiggsDNA
  • chpan/higgs-dna-chpan
  • chpan/HiggsDNA
  • czhou/higgs-dna-coffea
  • fmausolf/HiggsDNA
  • jspah/HiggsDNA
  • tbevilac/higgs-dna-tiziano-bevilacqua
  • magalli/HiggsDNA
  • cms-analysis/general/HiggsDNA
57 results
Show changes
Showing
with 4690 additions and 53 deletions
{
"best_resolution": {
"cat_filter": [
["sigma_m_over_m_smeared_decorr", "<", 0.0105],
["lead_mvaID", ">", 0.25],
["sublead_mvaID", ">", 0.25]
]
},
"medium_resolution": {
"cat_filter": [
["sigma_m_over_m_smeared_decorr", ">", 0.0105],
["sigma_m_over_m_smeared_decorr", "<", 0.0130],
["lead_mvaID", ">", 0.25],
["sublead_mvaID", ">", 0.25]
]
},
"worst_resolution": {
"cat_filter": [
["sigma_m_over_m_smeared_decorr", ">", 0.0130],
["lead_mvaID", ">", 0.25],
["sublead_mvaID", ">", 0.25]
]
}
}
\ No newline at end of file
{
"NOMINAL": "nominal",
"ScaleEBUp": "Et_dependent_ScaleEB_up",
"ScaleEBDown": "Et_dependent_ScaleEB_down",
"ScaleEEUp": "Et_dependent_ScaleEE_up",
"ScaleEEDown": "Et_dependent_ScaleEE_down",
"SmearingUp": "Et_dependent_Smearing_up",
"SmearingDown": "Et_dependent_Smearing_down",
"MaterialUp": "Material_up",
"MaterialDown": "Material_down",
"FNUFUp": "FNUF_up",
"FNUFDown": "FNUF_down",
"energyErrShiftUp": "energyErrShift_up",
"energyErrShiftDown": "energyErrShift_down"
}
\ No newline at end of file
{
"EBEB_highR9highR9": {
"cat_filter": [
["lead_isScEtaEB", "==", true],
["lead_r9", ">=", 0.85],
["sublead_isScEtaEB", "==", true],
["sublead_r9", ">=", 0.85],
["lead_mvaID", ">", 0.25],
["sublead_mvaID", ">", 0.25]
]
},
"EBEB_highR9lowR9": {
"cat_filter": [
["lead_isScEtaEB", "==", true],
["lead_r9", ">=", 0.85],
["sublead_isScEtaEB", "==", true],
["sublead_r9", "<", 0.85],
["sublead_r9", ">=", 0.5],
["lead_mvaID", ">", 0.25],
["sublead_mvaID", ">", 0.25]
]
},
"EBEB_lowR9highR9": {
"cat_filter": [
["lead_isScEtaEB", "==", true],
["lead_r9", "<", 0.85],
["lead_r9", ">=", 0.5],
["sublead_isScEtaEB", "==", true],
["sublead_r9", ">=", 0.85],
["lead_mvaID", ">", 0.25],
["sublead_mvaID", ">", 0.25]
]
},
"EBEE_highR9highR9": {
"cat_filter": [
["lead_isScEtaEB", "==", true],
["lead_r9", ">=", 0.85],
["sublead_isScEtaEE", "==", true],
["sublead_r9", ">=", 0.9],
["lead_mvaID", ">", 0.25],
["sublead_mvaID", ">", 0.25]
]
},
"EBEE_highR9lowR9": {
"cat_filter": [
["lead_isScEtaEB", "==", true],
["lead_r9", ">=", 0.85],
["sublead_isScEtaEE", "==", true],
["sublead_r9", "<", 0.9],
["sublead_r9", ">=", 0.8],
["lead_mvaID", ">", 0.25],
["sublead_mvaID", ">", 0.25]
]
},
"EBEE_lowR9highR9": {
"cat_filter": [
["lead_isScEtaEB", "==", true],
["lead_r9", "<", 0.85],
["lead_r9", ">=", 0.5],
["sublead_isScEtaEE", "==", true],
["sublead_r9", ">=", 0.9],
["lead_mvaID", ">", 0.25],
["sublead_mvaID", ">", 0.25]
]
},
"EEEB_highR9highR9": {
"cat_filter": [
["lead_isScEtaEE", "==", true],
["lead_r9", ">=", 0.9],
["sublead_isScEtaEB", "==", true],
["sublead_r9", ">=", 0.85],
["lead_mvaID", ">", 0.25],
["sublead_mvaID", ">", 0.25]
]
},
"EEEB_highR9lowR9": {
"cat_filter": [
["lead_isScEtaEE", "==", true],
["lead_r9", ">=", 0.9],
["sublead_isScEtaEB", "==", true],
["sublead_r9", "<", 0.85],
["sublead_r9", ">=", 0.5],
["lead_mvaID", ">", 0.25],
["sublead_mvaID", ">", 0.25]
]
},
"EEEB_lowR9highR9": {
"cat_filter": [
["lead_isScEtaEE", "==", true],
["lead_r9", "<", 0.9],
["lead_r9", ">=", 0.8],
["sublead_isScEtaEB", "==", true],
["sublead_r9", ">=", 0.85],
["lead_mvaID", ">", 0.25],
["sublead_mvaID", ">", 0.25]
]
},
"EEEE_incl": {
"cat_filter": [
["lead_isScEtaEE", "==", true],
["sublead_isScEtaEE", "==", true],
["lead_mvaID", ">", 0.25],
["sublead_mvaID", ">", 0.25]
]
}
}
\ No newline at end of file
{
"NOMINAL": "nominal",
"ScaleUp": "Scale_up",
"ScaleDown": "Scale_down",
"SmearingUp": "Smearing_up",
"SmearingDown": "Smearing_down"
}
\ No newline at end of file
import argparse
import json
def generate_categories(recoVar, binName, boundaries, isData=False):
lead_mvaId_string = "lead_mvaID"
sublead_mvaId_string = "sublead_mvaID"
sigma_m_over_m_string = "sigma_m_over_m_corr_smeared_decorr"
if isData:
sigma_m_over_m_string = "sigma_m_over_m_smeared_decorr"
categories = {}
mass_resolution_categories = ['cat0', 'cat1', 'cat2']
mass_resolution_thresholds = [0.010, 0.014]
absolute_value_vars = ["rapidity", "first_jet_y"]
for i in range(len(boundaries) - 1):
for j, mass_resolution_cat in enumerate(mass_resolution_categories):
if binName == "":
binName_str = recoVar
else:
binName_str = binName
category_name = f"RECO_{binName_str}_{str(boundaries[i]).replace('.', 'p')}_{str(boundaries[i + 1]).replace('.', 'p')}_{mass_resolution_cat}"
if recoVar in absolute_value_vars:
# Apply parquet way of handling logic AND and OR
# Outer list is always OR while inner lists are AND conditions
if boundaries[i] != 0:
category_filters = [
[
[lead_mvaId_string, ">", 0.25],
[sublead_mvaId_string, ">", 0.25],
[recoVar, ">=", boundaries[i]],
[recoVar, "<", boundaries[i + 1]]
],
[
[lead_mvaId_string, ">", 0.25],
[sublead_mvaId_string, ">", 0.25],
[recoVar, "<=", -1 * boundaries[i]],
[recoVar, ">", -1 * boundaries[i + 1]]
]
]
else:
category_filters = [
[
[lead_mvaId_string, ">", 0.25],
[sublead_mvaId_string, ">", 0.25],
[recoVar, ">=", boundaries[i]],
[recoVar, "<", boundaries[i + 1]]
],
[
[lead_mvaId_string, ">", 0.25],
[sublead_mvaId_string, ">", 0.25],
[recoVar, "<", boundaries[i]],
[recoVar, ">=", -1 * boundaries[i + 1]]
]
]
for k, _ in enumerate(category_filters):
if j == 0:
category_filters[k].append([sigma_m_over_m_string, "<", mass_resolution_thresholds[j]])
elif j == len(mass_resolution_categories) - 1:
category_filters[k].append([sigma_m_over_m_string, ">=", mass_resolution_thresholds[j - 1]])
else:
category_filters[k].append([sigma_m_over_m_string, ">=", mass_resolution_thresholds[j - 1]])
category_filters[k].append([sigma_m_over_m_string, "<", mass_resolution_thresholds[j]])
else:
category_filters = [
[lead_mvaId_string, ">", 0.25],
[sublead_mvaId_string, ">", 0.25],
[recoVar, ">=", boundaries[i]],
[recoVar, "<", boundaries[i + 1]]
]
if j == 0:
category_filters.append([sigma_m_over_m_string, "<", mass_resolution_thresholds[j]])
elif j == len(mass_resolution_categories) - 1:
category_filters.append([sigma_m_over_m_string, ">=", mass_resolution_thresholds[j - 1]])
else:
category_filters.append([sigma_m_over_m_string, ">=", mass_resolution_thresholds[j - 1]])
category_filters.append([sigma_m_over_m_string, "<", mass_resolution_thresholds[j]])
categories[category_name] = {
"cat_filter": category_filters
}
return categories
def save_to_json(output_file, categories):
with open(output_file, 'w') as outfile:
json.dump(categories, outfile, indent=4)
def main():
parser = argparse.ArgumentParser(description='Generate JSON file with specified reco variable boundaries.')
parser.add_argument('output_file', help='Output file name and location')
parser.add_argument('recoVar', type=str, default="pt", help="Reco variable")
parser.add_argument('boundaries', nargs='+', type=float, help='Boundaries list')
parser.add_argument('--isData', action="store_true", default=False, help="Add this flag if you are running over data, this changes the name of the sigma_m/m variable that is read.")
parser.add_argument("--binName", dest="binName", type=str, required=False, default="")
args = parser.parse_args()
print(args.isData)
categories = generate_categories(args.recoVar, args.binName, args.boundaries, args.isData)
save_to_json(args.output_file, categories)
if __name__ == "__main__":
main()
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
{
"GenPTH":{
"(0, 15, 'in', '')": 10,
"(15, 30, 'in', '')": 11,
"(30, 45, 'in', '')": 12,
"(45, 80, 'in', '')": 13,
"(80, 120, 'in', '')": 14,
"(120, 200, 'in', '')": 15,
"(200, 350, 'in', '')": 16,
"(350, 10000, 'in', '')": 17,
"(0, 10000, 'out', '')": 18
}
}
\ No newline at end of file
import glob
import awkward as ak
import pyarrow.parquet as pq
def Get_WeightSum_Btag(source_paths,logger):
# All systematic variation list
bTag_sys_variation = ['lfstats1', 'hfstats2' , 'jes', 'cferr2', 'lf', 'hf', 'lfstats2', 'hfstats1', 'cferr1']
sum_weight_central_arr, sum_weight_central_wo_bTagSF_arr = [], []
sum_weight_bTagSF_sys_arr = []
flag_bWeight_sys_array = []
for i, source_path in enumerate(source_paths):
# create array to store the sum of the weights for all systematic vartions
dataset_check_fields = ak.from_parquet(glob.glob("%s/*.parquet" % source_path)[0])
# check if systamtic vatiation are stored by acessing one field of the parquet file
flag_bWeight_sys = "weight_bTagSF_sys_jesDown" in dataset_check_fields.fields
del dataset_check_fields
if (flag_bWeight_sys):
logger.info(
f"Attampeting Extracting sum of central weights and bweight systematics from metadata of files to be merged from {source_path}"
)
else:
logger.info(
"Skiping the renormalization of b-tagging systematic weights. Please check if you have stored the weights for bTag systematic variation. Dont worry if you are not evaluating btaging systematic for now"
)
source_files = glob.glob("%s/*.parquet" % source_path)
sum_weight_central,sum_weight_central_wo_bTagSF = 0,0
sum_weight_bTagSF_sys_dct = {}
if (flag_bWeight_sys):
# dictionory to store up and down variation together
for numSys in range(0, len(bTag_sys_variation)):
sum_weight_bTagSF_sys_dct["sum_weight_bTagSF_" + bTag_sys_variation[numSys] + "Up"] = 0
sum_weight_bTagSF_sys_dct["sum_weight_bTagSF_" + bTag_sys_variation[numSys] + "Down"] = 0
for f in source_files:
try:
# read the sum of the weights from metadata without any systematic variation
sum_weight_central += float(pq.read_table(f).schema.metadata[b'sum_weight_central'])
sum_weight_central_wo_bTagSF += float(pq.read_table(f).schema.metadata[b'sum_weight_central_wo_bTagSF'])
except:
logger.info(
"Skiping the renormalization of weights from b-tagging systematics. Please check if you have stored sum of the weights after applying the b-weight systematics in the metadata with proper naming. Example: sum_weight_bTagSF_jesUp, sum_weight_bTagSF_jesDown."
)
# return sum of the weights before and after b-weight to 1 so that the ration will be one and merge_parquet.py will not process renormalization
sum_weight_central,sum_weight_central_wo_bTagSF = 1.0,1.0
if (flag_bWeight_sys):
for numSys in range(0, len(bTag_sys_variation)):
try:
# read the sum of the weights from metadata for all systematic variation
sum_weight_bTagSF_sys_dct["sum_weight_bTagSF_" + bTag_sys_variation[numSys] + "Up"] += float(pq.read_table(f).schema.metadata[bytes('sum_weight_bTagSF_sys_' + bTag_sys_variation[numSys] + 'Up',encoding='utf8')])
sum_weight_bTagSF_sys_dct["sum_weight_bTagSF_" + bTag_sys_variation[numSys] + "Down"] += float(pq.read_table(f).schema.metadata[bytes('sum_weight_bTagSF_sys_' + bTag_sys_variation[numSys] + 'Down',encoding='utf8')])
except:
logger.info(
"Skiping the renormalization of weights from btagging systematics. Please check if you have stored sum of the weights after appling the bweight systematics in the metadata with proper nameing : example: sum_weight_bTagSF_jesUp, sum_weight_bTagSF_jesDown"
)
flag_bWeight_sys = False
break
sum_weight_central_arr.append(sum_weight_central)
sum_weight_central_wo_bTagSF_arr.append(sum_weight_central_wo_bTagSF)
flag_bWeight_sys_array.append(flag_bWeight_sys)
sum_weight_bTagSF_sys_arr.append(sum_weight_bTagSF_sys_dct)
logger.info(
"Successfully extracted sum of weights with and without b-tag weights."
)
if (flag_bWeight_sys):
logger.info(
"Successfully extracted sum of systematic weights with and without b-tag SF"
)
IsBtagNorm_sys_arr,WeightSum_preBTag_arr,WeightSum_postBTag_arr,dir_WeightSum_postBTag_sys_arr = flag_bWeight_sys_array, sum_weight_central_wo_bTagSF_arr, sum_weight_central_arr,sum_weight_bTagSF_sys_arr
return IsBtagNorm_sys_arr,WeightSum_preBTag_arr,WeightSum_postBTag_arr,dir_WeightSum_postBTag_sys_arr
def Renormalize_BTag_Weights(dataset,target_path,cat,WeightSum_preBTag,WeightSum_postBTag,WeightSum_postBTag_sys,IsBtagNorm_sys,logger):
bTag_sys_variation = ['lfstats1', 'hfstats2' , 'jes', 'cferr2', 'lf', 'hf', 'lfstats2', 'hfstats1', 'cferr1']
logger.info(
f"Attempting to renormalize the weights wrt no b-tag SF from {target_path}{cat}_merged.parquet"
)
# Modify existing column
if (WeightSum_preBTag != 0 and WeightSum_postBTag != 0):
dataset['weight'] = dataset['weight'] * (WeightSum_preBTag / WeightSum_postBTag)
logger.info(
f"Successfully renormalised weights wrt no b-tag SF from {target_path}{cat}_merged.parquet"
)
else:
logger.info(
f"Skipping weights renormalisation wrt No bTagSF from {target_path}{cat}_merged.parquet"
)
if (IsBtagNorm_sys):
for numSys in range(0, len(bTag_sys_variation)):
dataset['weight_bTagSF_sys_' + bTag_sys_variation[numSys] + 'Up'] = dataset['weight_bTagSF_sys_' + bTag_sys_variation[numSys] + 'Up'] * (WeightSum_preBTag / WeightSum_postBTag_sys["sum_weight_bTagSF_" + bTag_sys_variation[numSys] + "Up"])
dataset['weight_bTagSF_sys_' + bTag_sys_variation[numSys] + 'Down'] = dataset['weight_bTagSF_sys_' + bTag_sys_variation[numSys] + 'Down'] * (WeightSum_preBTag / WeightSum_postBTag_sys["sum_weight_bTagSF_" + bTag_sys_variation[numSys] + "Down"])
logger.info(
f"Successfully renormalised weights wrt no b-tag SF from {target_path}{cat}_merged.parquet"
)
else:
logger.info(
f"Skipping systematic weights renormalisation wrt no b-tag SF from {target_path}{cat}_merged.parquet"
)
return dataset
This diff is collapsed.
This diff is collapsed.