Commit 6c90fea1 authored by Marcel Rieger's avatar Marcel Rieger
Browse files

Minor updates to scripts.

parent 7c434b75
......@@ -185,8 +185,8 @@ class DatacardRenamer(object):
# yield the context and handle errors
try:
with self._tfile_cache:
with manipulate_datacard(self.datacard) as content:
yield content
with manipulate_datacard(self.datacard) as blocks:
yield blocks
# add all output objects to the tfile cache for writing
for f in self._tobj_output_cache:
......@@ -206,6 +206,7 @@ class DatacardRenamer(object):
class ShapeLine(object):
@classmethod
def parse(cls, line):
parts = line.strip().split()
......
......@@ -78,11 +78,11 @@ def add_parameter(datacard, param_name, param_type, param_spec=None, directory=N
datacard = bundle_datacard(datacard, directory, skip_shapes=skip_shapes)
# start adding
with manipulate_datacard(datacard) as content:
with manipulate_datacard(datacard) as blocks:
# check if no parameter with that name already exists
existing_names = []
for key in ["parameters", "groups", "auto_mc_stats"]:
for line in content.get(key):
for line in blocks.get(key):
parts = line.strip().split()
if len(parts) >= 2:
existing_names.append(parts[1])
......@@ -99,16 +99,16 @@ def add_parameter(datacard, param_name, param_type, param_spec=None, directory=N
else:
key = "parameters"
logger.info("adding new {} parameter line '{}'".format(key, param_line))
content[key].append(param_line)
blocks[key].append(param_line)
else:
# the parameter is columnar, so get a list of bins and processs in order of appearance
if not content.get("rates"):
if not blocks.get("rates"):
raise Exception("adding a columnar parameter requires the datacard to have "
"process rates")
bin_names = content["rates"][0].split()[1:]
process_names = content["rates"][1].split()[1:]
bin_names = blocks["rates"][0].split()[1:]
process_names = blocks["rates"][1].split()[1:]
if len(bin_names) != len(process_names):
raise Exception("number of bins ({}) and processes ({}) not matching in datacard "
"rates".format(len(bin_names), len(process_names)))
......@@ -138,11 +138,11 @@ def add_parameter(datacard, param_name, param_type, param_spec=None, directory=N
# add the new line
param_line = " ".join([param_name, param_type] + parts)
logger.info("adding new parameter line '{}'".format(param_line))
content["parameters"].append(param_line)
blocks["parameters"].append(param_line)
# increase kmax in counts
if is_columnar:
update_datacard_count(content, "kmax", 1, diff=True, logger=logger)
update_datacard_count(blocks, "kmax", 1, diff=True, logger=logger)
if __name__ == "__main__":
......
......@@ -65,16 +65,16 @@ def flip_parameters(datacard, patterns, directory=None, skip_shapes=False, mass=
renamer = DatacardRenamer(datacard, directory=directory, skip_shapes=skip_shapes, logger=logger)
# start renaming
with renamer.start() as content:
with renamer.start() as blocks:
# get the lists of bin and process names from the "rates" block
bin_names = content["rates"][0].split()[1:]
process_names = content["rates"][1].split()[1:]
bin_names = blocks["rates"][0].split()[1:]
process_names = blocks["rates"][1].split()[1:]
if len(bin_names) != len(process_names):
raise Exception("number of bins ({}) and processes ({}) not matching in datacard "
"rates".format(len(bin_names), len(process_names)))
# iterate through lines in the "parameters" block
for i, param_line in enumerate(content.get("parameters", [])):
for i, param_line in enumerate(blocks.get("parameters", [])):
param_line = param_line.split()
if len(param_line) < 2:
continue
......@@ -117,12 +117,12 @@ def flip_parameters(datacard, patterns, directory=None, skip_shapes=False, mass=
"to {}".format(param_type, param_name, bin_name, process_name, f))
elif multi_match(param_type, "shape*"):
if f == "-" or skip_shapes or not content.get("shapes"):
if f == "-" or skip_shapes or not blocks.get("shapes"):
continue
# extract shape lines that have a systematic pattern and sort them so
# that most specific ones (i.e. without wildcards) come first
shape_lines = [ShapeLine(l, k) for k, l in enumerate(content["shapes"])]
shape_lines = [ShapeLine(l, k) for k, l in enumerate(blocks["shapes"])]
shape_lines = [sl for sl in shape_lines if sl.syst_pattern]
shape_lines.sort(key=lambda sl: sl.sorting_weight)
......@@ -173,7 +173,7 @@ def flip_parameters(datacard, patterns, directory=None, skip_shapes=False, mass=
# replace the line
param_line = " ".join([param_name, param_type] + new_effects)
logger.info("adding new parameter line '{}'".format(param_line))
content["parameters"][i] = param_line
blocks["parameters"][i] = param_line
if __name__ == "__main__":
......
......@@ -102,15 +102,15 @@ def merge_parameters(datacard, new_name, patterns, directory=None, skip_shapes=F
datacard = bundle_datacard(datacard, directory, skip_shapes=skip_shapes)
# start merging
with manipulate_datacard(datacard) as content:
with manipulate_datacard(datacard) as blocks:
# keep track of the full lines of parameters to be merged as well as their type
removed_param_lines = []
new_type = None
# find parameters to be merged
if content.get("parameters"):
if blocks.get("parameters"):
to_remove = []
for i, param_line in enumerate(content["parameters"]):
for i, param_line in enumerate(blocks["parameters"]):
param_line = param_line.split()
# the name must not exist yet
......@@ -141,9 +141,9 @@ def merge_parameters(datacard, new_name, patterns, directory=None, skip_shapes=F
to_remove.append(i)
# change lines in-place
lines = [line for i, line in enumerate(content["parameters"]) if i not in to_remove]
del content["parameters"][:]
content["parameters"].extend(lines)
lines = [line for i, line in enumerate(blocks["parameters"]) if i not in to_remove]
del blocks["parameters"][:]
blocks["parameters"].extend(lines)
# nothing to do when no parameter was found, this is likely is misconfiguration
if not removed_param_lines:
......@@ -154,10 +154,10 @@ def merge_parameters(datacard, new_name, patterns, directory=None, skip_shapes=F
# when the new type is "shape", verify that shape lines are given and sort them
shape_lines = None
if new_type == "shape":
if content.get("shapes"):
if blocks.get("shapes"):
# prepare shape lines that have a systematic pattern and sort them so that most
# specific ones (i.e. without wildcards) come first
shape_lines = [ShapeLine(line, j) for j, line in enumerate(content["shapes"])]
shape_lines = [ShapeLine(line, j) for j, line in enumerate(blocks["shapes"])]
shape_lines = [shape_line for shape_line in shape_lines if shape_line.syst_pattern]
shape_lines.sort(key=lambda shape_line: shape_line.sorting_weight)
......@@ -173,8 +173,8 @@ def merge_parameters(datacard, new_name, patterns, directory=None, skip_shapes=F
n_cols = list(unique_lengths)[0] - 2
# get all bins and processes
bin_names = content["rates"][0].split()[1:]
process_names = content["rates"][1].split()[1:]
bin_names = blocks["rates"][0].split()[1:]
process_names = blocks["rates"][1].split()[1:]
# quick check if all lists have the same lengths
if not (len(bin_names) == len(process_names) == n_cols):
......@@ -459,14 +459,14 @@ def merge_parameters(datacard, new_name, patterns, directory=None, skip_shapes=F
bin_name, process_name, merged_effect))
# add the merged line
content["parameters"].append(" ".join([new_name, new_type] + merged_effects))
blocks["parameters"].append(" ".join([new_name, new_type] + merged_effects))
logger.debug("added merged parameter line for bin {} and process {}".format(bin_name,
process_name))
# decrease kmax in counts
if removed_param_lines:
# decrement kmax
update_datacard_count(content, "kmax", 1 - len(removed_param_lines), diff=True,
update_datacard_count(blocks, "kmax", 1 - len(removed_param_lines), diff=True,
logger=logger)
......
......@@ -66,18 +66,18 @@ def remove_bin_process_pairs(datacard, patterns, directory=None, skip_shapes=Fal
datacard = bundle_datacard(datacard, directory, skip_shapes=skip_shapes)
# start removing
with manipulate_datacard(datacard) as content:
with manipulate_datacard(datacard) as blocks:
# keep track of which bins and processes were fully removed
fully_removed_bin_names = set()
fully_removed_process_names = set()
# remove from process rates and remember column indices for removal in parameters
removed_columns = []
if content.get("rates"):
bin_names = content["rates"][0].split()[1:]
process_names = content["rates"][1].split()[1:]
process_ids = content["rates"][2].split()[1:]
rates = content["rates"][3].split()[1:]
if blocks.get("rates"):
bin_names = blocks["rates"][0].split()[1:]
process_names = blocks["rates"][1].split()[1:]
process_ids = blocks["rates"][2].split()[1:]
rates = blocks["rates"][3].split()[1:]
# quick check if all lists have the same lengths
if not (len(bin_names) == len(process_names) == len(process_ids) == len(rates)):
......@@ -113,30 +113,30 @@ def remove_bin_process_pairs(datacard, patterns, directory=None, skip_shapes=Fal
fully_removed_process_names = set(process_names) - set(new_process_names)
# add back reduced lines
content["rates"][0] = "bin " + " ".join(new_bin_names)
content["rates"][1] = "process " + " ".join(new_process_names)
content["rates"][2] = "process " + " ".join(new_process_ids)
content["rates"][3] = "rate " + " ".join(new_rates)
blocks["rates"][0] = "bin " + " ".join(new_bin_names)
blocks["rates"][1] = "process " + " ".join(new_process_names)
blocks["rates"][2] = "process " + " ".join(new_process_ids)
blocks["rates"][3] = "rate " + " ".join(new_rates)
logger.info("removed {} entries from process rates".format(len(removed_columns)))
# decrease imax in counts
if fully_removed_bin_names:
logger.info("removed all occurrences of bin(s) {}".format(
", ".join(fully_removed_bin_names)))
update_datacard_count(content, "imax", -len(fully_removed_bin_names), diff=True,
update_datacard_count(blocks, "imax", -len(fully_removed_bin_names), diff=True,
logger=logger)
# decrease jmax in counts
if fully_removed_process_names:
logger.info("removed all occurrences of processes(s) {}".format(
", ".join(fully_removed_process_names)))
update_datacard_count(content, "jmax", -len(fully_removed_process_names), diff=True,
update_datacard_count(blocks, "jmax", -len(fully_removed_process_names), diff=True,
logger=logger)
# remove fully removed bins from observations
if content.get("observations") and fully_removed_bin_names:
bin_names = content["observations"][0].split()[1:]
observations = content["observations"][1].split()[1:]
if blocks.get("observations") and fully_removed_bin_names:
bin_names = blocks["observations"][0].split()[1:]
observations = blocks["observations"][1].split()[1:]
removed_obs_columns = []
for i, bin_name in enumerate(bin_names):
......@@ -145,12 +145,12 @@ def remove_bin_process_pairs(datacard, patterns, directory=None, skip_shapes=Fal
removed_obs_columns.append(i)
mask = lambda l: [elem for j, elem in enumerate(l) if j not in removed_obs_columns]
content["observations"][0] = "bin " + " ".join(mask(bin_names))
content["observations"][1] = "observation " + " ".join(mask(observations))
blocks["observations"][0] = "bin " + " ".join(mask(bin_names))
blocks["observations"][1] = "observation " + " ".join(mask(observations))
# remove from shape lines
if content.get("shapes"):
shape_lines = [ShapeLine(line, j) for j, line in enumerate(content["shapes"])]
if blocks.get("shapes"):
shape_lines = [ShapeLine(line, j) for j, line in enumerate(blocks["shapes"])]
to_remove = []
for shape_line in shape_lines:
# when both bin and process are wildcards, the shape line is not removed
......@@ -183,14 +183,14 @@ def remove_bin_process_pairs(datacard, patterns, directory=None, skip_shapes=Fal
break
# change lines in-place
lines = [line for j, line in enumerate(content["shapes"]) if j not in to_remove]
del content["shapes"][:]
content["shapes"].extend(lines)
lines = [line for j, line in enumerate(blocks["shapes"]) if j not in to_remove]
del blocks["shapes"][:]
blocks["shapes"].extend(lines)
# remove columns from certain parameters
if content.get("parameters") and removed_columns:
if blocks.get("parameters") and removed_columns:
expr = r"^([^\s]+)\s+({})\s+(.+)$".format("|".join(columnar_parameter_directives))
for i, param_line in enumerate(list(content["parameters"])):
for i, param_line in enumerate(list(blocks["parameters"])):
m = re.match(expr, param_line.strip())
if not m:
continue
......@@ -207,12 +207,12 @@ def remove_bin_process_pairs(datacard, patterns, directory=None, skip_shapes=Fal
logger.debug("remove {} column(s) from parameter {}".format(
len(removed_columns), param_name))
columns = [c for j, c in enumerate(columns) if j not in removed_columns]
content["parameters"][i] = " ".join([param_name, param_type] + columns)
blocks["parameters"][i] = " ".join([param_name, param_type] + columns)
# remove fully removed bins from auto mc stats
if content.get("auto_mc_stats") and fully_removed_bin_names:
if blocks.get("auto_mc_stats") and fully_removed_bin_names:
new_lines = []
for line in content["auto_mc_stats"]:
for line in blocks["auto_mc_stats"]:
bin_name = line.strip().split()[0]
if bin_name not in fully_removed_bin_names:
new_lines.append(line)
......@@ -220,8 +220,8 @@ def remove_bin_process_pairs(datacard, patterns, directory=None, skip_shapes=Fal
logger.info("remove autoMCStats for bin {}".format(bin_name))
# change lines in place
del content["auto_mc_stats"][:]
content["auto_mc_stats"].extend(new_lines)
del blocks["auto_mc_stats"][:]
blocks["auto_mc_stats"].extend(new_lines)
if __name__ == "__main__":
......
......@@ -55,14 +55,14 @@ def remove_parameters(datacard, patterns, directory=None, skip_shapes=False):
datacard = bundle_datacard(datacard, directory, skip_shapes=skip_shapes)
# start removing
with manipulate_datacard(datacard) as content:
with manipulate_datacard(datacard) as blocks:
# keep track of which exact parameters were removed that describe nuisances
removed_nuisance_names = []
# remove from parameters
if content.get("parameters"):
if blocks.get("parameters"):
to_remove = []
for i, param_line in enumerate(content["parameters"]):
for i, param_line in enumerate(blocks["parameters"]):
param_line = param_line.split()
if len(param_line) < 2:
continue
......@@ -74,13 +74,13 @@ def remove_parameters(datacard, patterns, directory=None, skip_shapes=False):
removed_nuisance_names.append(param_name)
# change lines in-place
lines = [line for i, line in enumerate(content["parameters"]) if i not in to_remove]
del content["parameters"][:]
content["parameters"].extend(lines)
lines = [line for i, line in enumerate(blocks["parameters"]) if i not in to_remove]
del blocks["parameters"][:]
blocks["parameters"].extend(lines)
# remove from group listings
if content.get("groups"):
for i, group_line in enumerate(list(content["groups"])):
if blocks.get("groups"):
for i, group_line in enumerate(list(blocks["groups"])):
m = re.match(r"^([^\s]+)\s+group\s+\=\s+(.+)$", group_line.strip())
if not m:
logger.error("invalid group line format: {}".format(group_line))
......@@ -93,26 +93,26 @@ def remove_parameters(datacard, patterns, directory=None, skip_shapes=False):
param_name, group_name))
param_names.remove(param_name)
group_line = "{} group = {}".format(group_name, " ".join(param_names))
content["groups"][i] = group_line
blocks["groups"][i] = group_line
# remove groups themselves
if content.get("groups"):
if blocks.get("groups"):
to_remove = []
for i, group_line in enumerate(content["groups"]):
for i, group_line in enumerate(blocks["groups"]):
group_name = group_line.split()[0]
if multi_match(group_name, patterns):
logger.info("remove group {}".format(group_name))
to_remove.append(i)
# change lines in-place
lines = [line for j, line in enumerate(content["groups"]) if j not in to_remove]
del content["groups"][:]
content["groups"].extend(lines)
lines = [line for j, line in enumerate(blocks["groups"]) if j not in to_remove]
del blocks["groups"][:]
blocks["groups"].extend(lines)
# remove auto mc stats
if content.get("auto_mc_stats"):
if blocks.get("auto_mc_stats"):
new_lines = []
for line in content["auto_mc_stats"]:
for line in blocks["auto_mc_stats"]:
bin_name = line.strip().split()[0]
if bin_name != "*" and multi_match(bin_name, patterns):
logger.info("remove autoMCStats for bin {}".format(bin_name))
......@@ -120,12 +120,12 @@ def remove_parameters(datacard, patterns, directory=None, skip_shapes=False):
new_lines.append(line)
# change lines in-place
del content["auto_mc_stats"][:]
content["auto_mc_stats"].extend(new_lines)
del blocks["auto_mc_stats"][:]
blocks["auto_mc_stats"].extend(new_lines)
# decrease kmax in counts
if removed_nuisance_names:
update_datacard_count(content, "kmax", -len(removed_nuisance_names), diff=True,
update_datacard_count(blocks, "kmax", -len(removed_nuisance_names), diff=True,
logger=logger)
......
......@@ -71,24 +71,24 @@ def rename_parameters(datacard, rules, directory=None, skip_shapes=False, mass="
return old_name, new_name, towner
# start renaming
with renamer.start() as content:
with renamer.start() as blocks:
# rename parameter names in the "parameters" block itself
if content.get("parameters"):
if blocks.get("parameters"):
def sub_fn(match):
old_name, rest = match.groups()
new_name = renamer.translate(old_name)
logger.info("rename parameter {} to {}".format(old_name, new_name))
return " ".join([new_name, rest])
for i, param_line in enumerate(list(content["parameters"])):
for i, param_line in enumerate(list(blocks["parameters"])):
old_name = param_line.split()[0]
if renamer.has_rule(old_name):
expr = r"^({})\s(.*)$".format(old_name)
param_line = re.sub(expr, sub_fn, param_line)
content["parameters"][i] = param_line
blocks["parameters"][i] = param_line
# update them in group listings
if content.get("groups"):
if blocks.get("groups"):
def sub_fn(match):
start, old_name, end = match.groups()
new_name = renamer.translate(old_name)
......@@ -96,29 +96,29 @@ def rename_parameters(datacard, rules, directory=None, skip_shapes=False, mass="
start.split()[0], new_name))
return " ".join([start, new_name, end]).strip()
for i, group_line in enumerate(list(content["groups"])):
for i, group_line in enumerate(list(blocks["groups"])):
for old_name in renamer.rules:
expr = r"^(.+\s+group\s+=.*)\s({})\s(.*)$".format(old_name)
group_line = re.sub(expr, sub_fn, group_line + " ")
content["groups"][i] = group_line
blocks["groups"][i] = group_line
# update group names themselves
if content.get("groups"):
if blocks.get("groups"):
def sub_fn(match):
old_name, rest = match.groups()
new_name = renamer.translate(old_name)
logger.info("rename group {} to {}".format(old_name, new_name))
return " ".join([new_name, rest])
for i, group_line in enumerate(list(content["groups"])):
for i, group_line in enumerate(list(blocks["groups"])):
group_name = group_line.split()[0]
if renamer.has_rule(group_name):
expr = r"^({})\s(.*)$".format(group_name)
group_line = re.sub(expr, sub_fn, group_line)
content["groups"][i] = group_line
blocks["groups"][i] = group_line
# rename shapes
if not skip_shapes and content.get("shapes"):
if not skip_shapes and blocks.get("shapes"):
# determine shape systematic names per (bin, process) pair
shape_syst_names = renamer.get_bin_process_to_systs_mapping()
......@@ -127,7 +127,7 @@ def rename_parameters(datacard, rules, directory=None, skip_shapes=False, mass="
# extract shape lines that have a systematic pattern and sort them so that most specific
# ones (i.e. without wildcards) come first
shape_lines = [ShapeLine(line, j) for j, line in enumerate(content["shapes"])]
shape_lines = [ShapeLine(line, j) for j, line in enumerate(blocks["shapes"])]
shape_lines = [shape_line for shape_line in shape_lines if shape_line.syst_pattern]
shape_lines.sort(key=lambda shape_line: shape_line.sorting_weight)
......
......@@ -82,18 +82,18 @@ def rename_processes(datacard, rules, directory=None, skip_shapes=False, mass="1
return old_name, new_name, new_pattern, towner
# start renaming
with renamer.start() as content:
with renamer.start() as blocks:
# rename names in process rates
if content.get("rates"):
line = content["rates"][1] + " "
if blocks.get("rates"):
line = blocks["rates"][1] + " "
for old_name, new_name in renamer.rules.items():
if (" " + old_name + " ") in line[len("process"):]:
logger.info("rename process {} to {}".format(old_name, new_name))
line = line.replace(" " + old_name + " ", " " + new_name + " ")
content["rates"][1] = line.strip()
blocks["rates"][1] = line.strip()
# rename shapes
if content.get("shapes"):
if blocks.get("shapes"):
# determine shape systematic names per (bin, process) pair
shape_syst_names = renamer.get_bin_process_to_systs_mapping()
......@@ -105,7 +105,7 @@ def rename_processes(datacard, rules, directory=None, skip_shapes=False, mass="1
# extract shape lines and sort them so that most specific ones
# (i.e. without wildcards) come first
shape_lines = [ShapeLine(line, j) for j, line in enumerate(content["shapes"])]
shape_lines = [ShapeLine(line, j) for j, line in enumerate(blocks["shapes"])]
shape_lines.sort(key=lambda shape_line: shape_line.sorting_weight)
# go through shape lines and do the renaming
......@@ -161,8 +161,8 @@ def rename_processes(datacard, rules, directory=None, skip_shapes=False, mass="1
if not process_is_wildcard:
new_shape_line.process = renamer.translate(process_name)
# add the new line back to content
content["shapes"][new_shape_line.i] = str(new_shape_line)
# add the new line back to blocks
blocks["shapes"][new_shape_line.i] = str(new_shape_line)
if __name__ == "__main__":
......
......@@ -25,11 +25,8 @@ from collections import OrderedDict
from dhi.datacard_tools import (
columnar_parameter_directives, bundle_datacard, manipulate_datacard, update_datacard_count,
expand_variables, expand_file_lines, ShapeLine,
)
from dhi.util import (
import_ROOT, TFileCache, real_path, multi_match, create_console_logger, patch_object,
)
from dhi.util import real_path, multi_match, create_console_logger, patch_object
logger = create_console_logger(os.path.splitext(os.path.basename(__file__))[0])
......@@ -77,12 +74,12 @@ def split_parameter(datacard, param_name, specs, ensure_unique=False, ensure_all
datacard = bundle_datacard(datacard, directory, skip_shapes=skip_shapes)
# start splitting
with manipulate_datacard(datacard) as content:
if not content.get("parameters"):
with manipulate_datacard(datacard) as blocks:
if not blocks.get("parameters"):
return
# lookup the parameter line to split
for line_idx, param_line in enumerate(content["parameters"]):
for line_idx, param_line in enumerate(blocks["parameters"]):
param_line = param_line.split()
# none of the new names should exist already
......@@ -96,7 +93,7 @@ def split_parameter(datacard, param_name, specs, ensure_unique=False, ensure_all
# cannot process with less than two line elements
if len(param_line) < 2:
raise Exception("invalid parameter line: ".format(content["parameters"][i]))
raise Exception("invalid parameter line: " + blocks["parameters"][line_idx])
# check the type
param_type = param_line[1]
......@@ -177,20 +174,20 @@ def split_parameter(datacard, param_name, specs, ensure_unique=False, ensure_all
raise Exception(msg)
# remove the old line
lines = [line for i, line in enumerate(content["parameters"]) if i != line_idx]
del content["parameters"][:]
content["parameters"].extend(lines)
lines = [line for i, line in enumerate(blocks["parameters"]) if i != line_idx]
del blocks["parameters"][:]
blocks["parameters"].extend(lines)
logger.info("removed parameter {} with type {} and {} values".format(
param_name, param_type, len(param_values) - param_values.count("-")))
# add the new lines
for new_name, spec_line in zip(specs, spec_lines):
content["parameters"].append(" ".join(spec_line))
blocks["parameters"].append(" ".join(spec_line))
logger.info("added new parameter {} with type {} and {} values".format(
new_name, param_type, len(spec_line) - spec_line.count("-") - 2))
# update kmax in counts
update_datacard_count(content, "kmax", len(spec_lines) - 1, diff=True, logger=logger)
update_datacard_count(blocks, "kmax", len(spec_lines) - 1, diff=True, logger=logger)
if __name__ == "__main__":
......
Supports Markdown
0% or .