Commit 47216ca1 authored by Joschka Birk's avatar Joschka Birk
Browse files

Merge branch alfroch-fix-roc-error with refs/heads/master into refs/merge-requests/566/train

parents 83817768 fed9c445
Pipeline #4124571 passed with stages
in 15 minutes and 17 seconds
......@@ -4,6 +4,7 @@
### Latest
- Fixing uncertainty calculation for the ROC curves [!566](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/566)
### [v0.9](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/tags/0.9)
......
......@@ -364,7 +364,11 @@ def evaluate_model(
f"results{results_filename_extension}-rej_per_eff-{epoch}.h5",
"a",
) as h5_file:
h5_file.attrs["N_test"] = len(jets)
# Put the number of jets per class in the dict for unc calculation
for flav_counter, flavour in enumerate(class_labels):
h5_file.attrs[f"njets_{flavour}"] = len(
truth_internal_labels[truth_internal_labels == flav_counter]
)
# Get the rejections, discs and f_* values for the taggers
tagger_fraction_rej_dict = uet.get_rej_per_frac_dict(
......@@ -398,7 +402,11 @@ def evaluate_model(
f"results{results_filename_extension}-rej_per_fractions-{args.epoch}.h5",
"a",
) as h5_file:
h5_file.attrs["N_test"] = len(jets)
# Put the number of jets per class in the dict for unc calculation
for flav_counter, flavour in enumerate(class_labels):
h5_file.attrs[f"njets_{flavour}"] = len(
truth_internal_labels[truth_internal_labels == flav_counter]
)
def evaluate_model_dips(
......@@ -641,7 +649,11 @@ def evaluate_model_dips(
f"results{results_filename_extension}-rej_per_eff-{args.epoch}.h5",
"a",
) as h5_file:
h5_file.attrs["N_test"] = len(jets)
# Put the number of jets per class in the dict for unc calculation
for flav_counter, flavour in enumerate(class_labels):
h5_file.attrs[f"njets_{flavour}"] = len(
truth_internal_labels[truth_internal_labels == flav_counter]
)
# Get the rejections, discs and f_* values for the taggers
tagger_fraction_rej_dict = uet.get_rej_per_frac_dict(
......@@ -674,7 +686,11 @@ def evaluate_model_dips(
f"results{results_filename_extension}-rej_per_fractions-{args.epoch}.h5",
"a",
) as h5_file:
h5_file.attrs["N_test"] = len(jets)
# Put the number of jets per class in the dict for unc calculation
for flav_counter, flavour in enumerate(class_labels):
h5_file.attrs[f"njets_{flavour}"] = len(
truth_internal_labels[truth_internal_labels == flav_counter]
)
if (
"calculate_saliency" in eval_params
......@@ -939,7 +955,11 @@ def evaluate_model_dl1(
f"results{results_filename_extension}-rej_per_eff-{args.epoch}.h5",
"a",
) as h5_file:
h5_file.attrs["N_test"] = len(jets)
# Put the number of jets per class in the dict for unc calculation
for flav_counter, flavour in enumerate(class_labels):
h5_file.attrs[f"njets_{flavour}"] = len(
truth_internal_labels[truth_internal_labels == flav_counter]
)
# Get the rejections, discs and f_* values for the taggers
tagger_fraction_rej_dict = uet.get_rej_per_frac_dict(
......@@ -973,7 +993,11 @@ def evaluate_model_dl1(
f"results{results_filename_extension}-rej_per_fractions-{args.epoch}.h5",
"a",
) as h5_file:
h5_file.attrs["N_test"] = len(jets)
# Put the number of jets per class in the dict for unc calculation
for flav_counter, flavour in enumerate(class_labels):
h5_file.attrs[f"njets_{flavour}"] = len(
truth_internal_labels[truth_internal_labels == flav_counter]
)
if args.shapley:
logger.info("Explaining feature importance with SHAPley")
......
......@@ -224,7 +224,7 @@ def plot_roc(
same_height_WP: bool = True,
linestyles: list = None,
colours: list = None,
n_test=None,
n_test: list = None,
reference_ratio: list = None,
**kwargs,
):
......@@ -266,8 +266,8 @@ def plot_roc(
List of linestyles to use for the given models, by default None
colours : list, optional
List of linecolors to use for the given models, by default None
n_test : [type], optional
A list of the same length as class_rejections, with the number of
n_test : list, optional
A list of the same length as rej_class_list, with the number of
events used to calculate the background efficiencies.
We need this To calculate the binomial errors on the background
rejection, using the formula given by
......@@ -362,17 +362,12 @@ def plot_roc(
if draw_errors is True:
# Check if n_test is provided in all samples
if n_test is None:
n_test_in_file = ["N_test" in df_results for df_results in df_results_list]
if not all(n_test_in_file):
logger.error(
"Requested binomialErrors, but not all models have n_test. "
"Will NOT plot rej errors."
)
draw_errors = False
elif isinstance(n_test, (int, float)):
n_test = [n_test] * len(df_results_list)
logger.error(
"Requested binomialErrors, but no number of jets used for "
"rejection calculation are given! "
"Will NOT plot rej errors."
)
draw_errors = False
elif isinstance(n_test, list):
if len(n_test) != len(df_results_list):
......
......@@ -160,6 +160,12 @@ def plot_roc(
Path to the results directory of the model.
print_model : bool
Print the models which are plotted while plotting.
Raises
------
AttributeError
If the needed njets per class used to calculate the
rejections is not in the rej_per_epoch results file.
"""
df_results_list = []
tagger_list = []
......@@ -167,20 +173,11 @@ def plot_roc(
labels = []
linestyles = []
colours = []
njets_test = []
# Get the epoch which is to be evaluated
eval_epoch = int(eval_params["epoch"])
if (
"n_test" not in plot_config["plot_settings"]
or plot_config["plot_settings"]["n_test"] is None
):
n_test_provided = False
plot_config["plot_settings"]["n_test"] = []
else:
n_test_provided = True
for model_name, model_config in plot_config["models_to_plot"].items():
if print_model:
logger.info(f"model: {model_name}")
......@@ -210,17 +207,27 @@ def plot_roc(
colours.append(model_config["colour"])
# n_test is only needed to calculate binomial errors
if not n_test_provided and (
if (
"draw_errors" in plot_config["plot_settings"]
and plot_config["plot_settings"]["draw_errors"]
and plot_config["plot_settings"]["draw_errors"] is False
):
with h5py.File(
eval_file_dir + f"/results-rej_per_eff-{eval_epoch}.h5", "r"
) as h5_file:
plot_config["plot_settings"]["n_test"].append(h5_file.attrs["N_test"])
njets_test.append(None)
else:
plot_config["plot_settings"]["n_test"] = None
try:
with h5py.File(
eval_file_dir + f"/results-rej_per_eff-{eval_epoch}.h5", "r"
) as h5_file:
njets_test.append(
h5_file.attrs[f"njets_{model_config['rejection_class']}"]
)
except KeyError as error:
raise AttributeError(
"You set draw_errors to True but the needed number of jets per "
"class used to calculate the rejections is not in the results "
"file! Please re-run the evaluation to add this values correctly!"
) from error
# Get the right ratio id for correct ratio calculation
ratio_dict = {}
......@@ -246,6 +253,7 @@ def plot_roc(
ratio_id=ratio_id,
linestyles=linestyles,
colours=colours,
n_test=njets_test,
**plot_config["plot_settings"],
)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment