Commit 01e27ae5 authored by Philipp Gadow's avatar Philipp Gadow
Browse files

Merge branch mguth-pylint with refs/heads/master into refs/merge-requests/524/train

parents 82a6c376 cecb86aa
Pipeline #3869806 passed with stages
in 26 minutes and 37 seconds
......@@ -76,7 +76,7 @@ def get_parser():
return parser.parse_args()
def EvaluateModel(
def evaluate_model(
args: object,
train_config: object,
preprocess_config: object,
......@@ -148,18 +148,18 @@ def EvaluateModel(
tagger_names = []
tagger_preds = []
# Set number of nJets for testing
nJets = int(eval_params["n_jets"]) if not args.nJets else args.nJets
# Set number of n_jets for testing
n_jets = int(eval_params["n_jets"]) if not args.nJets else args.nJets
# Check the config if the trained model is also to be evaluated
try:
Eval_model_bool = train_config.evaluate_trained_model
eval_model_bool = train_config.evaluate_trained_model
except AttributeError:
Eval_model_bool = True
eval_model_bool = True
# Set epoch to use for evaluation of trained model or dummy value if
# tagger scores from derivations should be used
epoch = args.epoch if Eval_model_bool else 0
epoch = args.epoch if eval_model_bool else 0
# Test if multiple taggers are given or not
tagger_list = (
......@@ -178,7 +178,7 @@ def EvaluateModel(
) from error
# evaluate trained model file (for evaluate_trained_model: True in config)
if Eval_model_bool:
if eval_model_bool:
if epoch is None:
raise ValueError("You need to give an epoch which is to be evaluated!")
......@@ -196,45 +196,45 @@ def EvaluateModel(
# Check which test files need to be loaded depending on the CADS version
if tagger.casefold() == "umami_cond_att".casefold():
# Load the test jets
X_test, X_test_trk, _ = utt.GetTestFile(
x_test, x_test_trk, _ = utt.GetTestFile(
input_file=test_file,
var_dict=train_config.var_dict,
preprocess_config=preprocess_config,
class_labels=class_labels,
tracks_name=tracks_name,
nJets=nJets,
nJets=n_jets,
exclude=exclude,
cut_vars_dict=var_cuts,
print_logger=False,
)
# Form the inputs for the network
X = [
X_test_trk,
X_test[
x_comb = [
x_test_trk,
x_test[
[
global_config.etavariable,
global_config.pTvariable,
]
],
X_test,
x_test,
]
else:
# Get the testfile with the needed configs
X_test, X_test_trk, _ = utt.GetTestFile(
x_test, x_test_trk, _ = utt.GetTestFile(
input_file=test_file,
var_dict=train_config.var_dict,
preprocess_config=preprocess_config,
class_labels=class_labels,
tracks_name=tracks_name,
nJets=nJets,
nJets=n_jets,
exclude=exclude,
cut_vars_dict=var_cuts,
)
# Form the inputs for the network
X = [X_test_trk, X_test]
x_comb = [x_test_trk, x_test]
# Load the model for evaluation. Note: The Sum is needed here!
with CustomObjectScope(
......@@ -251,7 +251,7 @@ def EvaluateModel(
model = load_model(model_file)
# Predict the output of the model on the test jets
pred_dips, pred_umami = model.predict(X, batch_size=5000, verbose=0)
pred_dips, pred_umami = model.predict(x_comb, batch_size=5000, verbose=0)
# Fill the tagger_names and tagger_preds
tagger_names = ["dips", "umami"]
......@@ -279,7 +279,7 @@ def EvaluateModel(
jets, truth_internal_labels = udt.LoadJetsFromFile(
filepath=test_file,
class_labels=class_labels,
nJets=nJets,
nJets=n_jets,
variables=variables,
cut_vars_dict=var_cuts,
)
......@@ -389,7 +389,7 @@ def EvaluateModel(
h5_file.attrs["N_test"] = len(jets)
def EvaluateModelDips(
def evaluate_model_dips(
args: object,
train_config: object,
preprocess_config: object,
......@@ -462,8 +462,8 @@ def EvaluateModelDips(
" Please check if you defined them!"
)
# Set number of nJets for testing
nJets = int(eval_params["n_jets"]) if not args.nJets else args.nJets
# Set number of n_jets for testing
n_jets = int(eval_params["n_jets"]) if not args.nJets else args.nJets
# Test if multiple taggers are given or not
if isinstance(eval_params["tagger"], str):
......@@ -490,13 +490,13 @@ def EvaluateModelDips(
# Check which test files need to be loaded depending on the CADS version
if tagger.casefold() == "CADS".casefold():
# Load the test jets
X_test, X_test_trk, Y_test = utt.GetTestFile(
x_test, x_test_trk, y_test = utt.GetTestFile(
input_file=test_file,
var_dict=train_config.var_dict,
preprocess_config=preprocess_config,
class_labels=class_labels,
tracks_name=tracks_name,
nJets=nJets,
nJets=n_jets,
cut_vars_dict=var_cuts,
jet_variables=[
global_config.etavariable,
......@@ -506,17 +506,17 @@ def EvaluateModelDips(
)
# Form the inputs for the network
X = [X_test_trk, X_test]
x_comb = [x_test_trk, x_test]
else:
# Get the testfile with the needed configs
X, Y_test = utt.get_test_sample_trks(
x_comb, y_test = utt.get_test_sample_trks(
input_file=test_file,
var_dict=train_config.var_dict,
preprocess_config=preprocess_config,
class_labels=class_labels,
tracks_name=tracks_name,
nJets=nJets,
nJets=n_jets,
cut_vars_dict=var_cuts,
)
......@@ -537,7 +537,7 @@ def EvaluateModelDips(
# Get predictions from trained model
pred_dips = model.predict(
X,
x_comb,
batch_size=train_config.NN_structure["batch_size"],
verbose=0,
)
......@@ -564,7 +564,7 @@ def EvaluateModelDips(
jets, truth_internal_labels = udt.LoadJetsFromFile(
filepath=test_file,
class_labels=class_labels,
nJets=nJets,
nJets=n_jets,
variables=variables,
cut_vars_dict=var_cuts,
)
......@@ -671,8 +671,8 @@ def EvaluateModelDips(
saliency_map_dict = uet.GetSaliencyMapDict(
model=model,
model_pred=pred_dips,
X_test=X,
Y_test=Y_test,
X_test=x_comb,
Y_test=y_test,
class_labels=class_labels,
main_class=main_class,
frac_dict=eval_params["frac_values"],
......@@ -684,11 +684,11 @@ def EvaluateModelDips(
f"{train_config.model_name}/results/saliency{results_filename_extension}"
f"_{args.epoch}_{data_set_name}.pkl",
"wb",
) as f:
pickle.dump(saliency_map_dict, f)
) as pkl_file:
pickle.dump(saliency_map_dict, pkl_file)
def EvaluateModelDL1(
def evaluate_model_dl1(
args: object,
train_config: object,
preprocess_config: object,
......@@ -761,7 +761,7 @@ def EvaluateModelDL1(
raise ValueError("You need to give an epoch which is to be evaluated!")
# Set number of nJets for testing
nJets = int(eval_params["n_jets"]) if not args.nJets else args.nJets
n_jets = int(eval_params["n_jets"]) if not args.nJets else args.nJets
# Test if multiple taggers are given or not
if isinstance(eval_params["tagger"], str):
......@@ -788,12 +788,12 @@ def EvaluateModelDL1(
exclude = train_config.config["exclude"]
# Get the testfile with the needed configs
X_test, _ = utt.get_test_sample(
x_test, _ = utt.get_test_sample(
input_file=test_file,
var_dict=train_config.var_dict,
preprocess_config=preprocess_config,
class_labels=class_labels,
nJets=nJets,
nJets=n_jets,
exclude=exclude,
cut_vars_dict=var_cuts,
)
......@@ -802,8 +802,8 @@ def EvaluateModelDL1(
model = load_model(model_file)
# Predict the output of the model on the test jets
pred_DL1 = model.predict(
X_test,
pred_dl1 = model.predict(
x_test,
batch_size=train_config.NN_structure["batch_size"],
verbose=0,
)
......@@ -846,7 +846,7 @@ def EvaluateModelDL1(
jets, truth_internal_labels = udt.LoadJetsFromFile(
filepath=test_file,
class_labels=class_labels,
nJets=nJets,
nJets=n_jets,
variables=variables,
cut_vars_dict=var_cuts,
)
......@@ -856,7 +856,7 @@ def EvaluateModelDL1(
df_discs_dict = uet.GetScoresProbsDict(
jets=jets,
y_true=truth_internal_labels,
tagger_preds=[pred_DL1],
tagger_preds=[pred_dl1],
tagger_names=["DL1"],
tagger_list=tagger_list,
class_labels=class_labels,
......@@ -889,7 +889,7 @@ def EvaluateModelDL1(
tagger_rej_dicts = uet.GetRejectionPerEfficiencyDict(
jets=jets,
y_true=truth_internal_labels,
tagger_preds=[pred_DL1],
tagger_preds=[pred_dl1],
tagger_names=["DL1"],
tagger_list=tagger_list,
class_labels=class_labels,
......@@ -923,7 +923,7 @@ def EvaluateModelDL1(
tagger_fraction_rej_dict = uet.GetRejectionPerFractionDict(
jets=jets,
y_true=truth_internal_labels,
tagger_preds=[pred_DL1],
tagger_preds=[pred_dl1],
tagger_names=["DL1"],
tagger_list=tagger_list,
class_labels=class_labels,
......@@ -950,14 +950,14 @@ def EvaluateModelDL1(
f"{train_config.model_name}/results/"
f"results{results_filename_extension}-rej_per_fractions-{args.epoch}.h5",
"a",
) as f:
f.attrs["N_test"] = len(jets)
) as h5_file:
h5_file.attrs["N_test"] = len(jets)
if args.shapley:
logger.info("Explaining feature importance with SHAPley")
FeatureImportance.ShapleyOneFlavor(
model=model,
test_data=X_test,
test_data=x_test,
model_output=eval_params["shapley"]["model_output"],
feature_sets=eval_params["shapley"]["feature_sets"],
plot_size=eval_params["shapley"]["plot_size"],
......@@ -968,7 +968,7 @@ def EvaluateModelDL1(
if eval_params["shapley"]["bool_all_flavor_plot"]:
FeatureImportance.ShapleyAllFlavors(
model=model,
test_data=X_test,
test_data=x_test,
feature_sets=eval_params["shapley"]["feature_sets"],
averaged_sets=eval_params["shapley"]["averaged_sets"],
plot_size=eval_params["shapley"]["plot_size"],
......@@ -986,7 +986,7 @@ if __name__ == "__main__":
try:
evaluate_trained_model = training_config.evaluate_trained_model
except AttributeError:
evaluate_trained_model = True
evaluate_trained_model = True # pylint: disable=invalid-name
preprocessing_config = (
Configuration(training_config.preprocess_config)
......@@ -1005,7 +1005,7 @@ if __name__ == "__main__":
logger.info(
"No tagger defined. Running evaluation without a freshly trained model!"
)
tagger_name = None
tagger_name = None # pylint: disable=invalid-name
# TODO Change this in python 3.10
if tagger_name == "dl1":
......@@ -1014,7 +1014,7 @@ if __name__ == "__main__":
test_file_identifier,
test_file_config,
) in training_config.test_files.items():
EvaluateModelDL1(
evaluate_model_dl1(
args=parser_args,
train_config=training_config,
preprocess_config=preprocessing_config,
......@@ -1029,7 +1029,7 @@ if __name__ == "__main__":
test_file_identifier,
test_file_config,
) in training_config.test_files.items():
EvaluateModelDips(
evaluate_model_dips(
args=parser_args,
train_config=training_config,
preprocess_config=preprocessing_config,
......@@ -1051,7 +1051,7 @@ if __name__ == "__main__":
test_file_identifier,
test_file_config,
) in training_config.test_files.items():
EvaluateModel(
evaluate_model(
args=parser_args,
train_config=training_config,
preprocess_config=preprocessing_config,
......
"""Collection of plotting function for ftag performance plots."""
# pylint: disable=consider-using-f-string
# pylint: disable=consider-using-f-string, invalid-name
# TODO: switch to new plotting API with pep8 conform naming
from umami.configuration import global_config, logger # isort:skip
from collections import OrderedDict
......
......@@ -204,7 +204,7 @@ def GetRejectionPerFractionDict(
continue
# Calculate the rejections for the given tagger
rej_dict_tmp, _ = umt.GetRejection(
rej_dict_tmp, _ = umt.get_rejection(
y_pred=y_pred,
y_true=y_true,
class_labels=class_labels,
......@@ -335,7 +335,7 @@ def GetRejectionPerEfficiencyDict(
skipped_taggers.append(tagger)
continue
rej_dict_tmp, disc_cut_dict_tmp = umt.GetRejection(
rej_dict_tmp, disc_cut_dict_tmp = umt.get_rejection(
y_pred=y_pred,
y_true=y_true,
class_labels=class_labels,
......@@ -497,7 +497,7 @@ def GetScoresProbsDict(
# Adding scores of the trained network
try:
df_discs_dict[f"disc_{tagger}"] = umt.GetScore(
df_discs_dict[f"disc_{tagger}"] = umt.get_score(
y_pred=y_pred,
class_labels=class_labels_copy,
main_class=main_class,
......@@ -560,7 +560,7 @@ def GetSaliencyMapDict(
# Define the last node for the discriminant output
disc = Lambda(
umt.GetScore,
umt.get_score,
output_shape=umt.discriminant_output_shape,
arguments={
"class_labels": class_labels,
......@@ -580,7 +580,7 @@ def GetSaliencyMapDict(
nTrks = np.sum(boolMask, axis=-1)
# Get score for the dips prediction
Disc_values = umt.GetScore(
Disc_values = umt.get_score(
y_pred=model_pred,
class_labels=class_labels,
main_class=main_class,
......@@ -681,7 +681,7 @@ def RecomputeScore(
shaped_proba = np.transpose(shaped_proba)
# Returns the score
return umt.GetScore(
return umt.get_score(
shaped_proba,
class_labels=model_class_labels,
main_class=main_class,
......
# flake8: noqa
# pylint: skip-file
from umami.input_vars_tools.PlottingFunctions import (
from umami.input_vars_tools.plotting_functions import (
plot_input_vars_jets,
plot_input_vars_trks,
plot_n_tracks_per_jet,
......
......@@ -236,10 +236,6 @@ def plot_input_vars_trks(
Keyword arguments passed to the plot. You can use all arguments that are
supported by the `histogram_plot` class in the plotting API.
Raises
------
ValueError
If the type of the given binning is not supported.
"""
kwargs = check_kwargs_for_ylabel_and_n_ratio_panel(
......@@ -291,36 +287,38 @@ def plot_input_vars_trks(
# Loading track variables
try:
trksVars = variable_config["tracks"]
trks_vars = variable_config["tracks"]
except KeyError:
noNormVars = variable_config["track_train_variables"][datasets_track_names[0]][
"noNormVars"
]
logNormVars = variable_config["track_train_variables"][datasets_track_names[0]][
"logNormVars"
]
jointNormVars = variable_config["track_train_variables"][
no_norm_vars = variable_config["track_train_variables"][
datasets_track_names[0]
]["noNormVars"]
log_norm_vars = variable_config["track_train_variables"][
datasets_track_names[0]
]["logNormVars"]
joint_norm_vars = variable_config["track_train_variables"][
datasets_track_names[0]
]["jointNormVars"]
trksVars = noNormVars + logNormVars + jointNormVars
trks_vars = no_norm_vars + log_norm_vars + joint_norm_vars
# Check for variables in the other
for counter, track_names in enumerate(datasets_track_names):
if counter != 0:
noNormVars_tmp = variable_config["track_train_variables"][track_names][
"noNormVars"
]
logNormVars_tmp = variable_config["track_train_variables"][track_names][
"logNormVars"
]
jointNormVars_tmp = variable_config["track_train_variables"][
no_norm_vars_tmp = variable_config["track_train_variables"][
track_names
]["noNormVars"]
log_norm_vars_tmp = variable_config["track_train_variables"][
track_names
]["logNormVars"]
joint_norm_vars_tmp = variable_config["track_train_variables"][
track_names
]["jointNormVars"]
trksVars_tmp = noNormVars_tmp + logNormVars_tmp + jointNormVars_tmp
trks_vars_tmp = (
no_norm_vars_tmp + log_norm_vars_tmp + joint_norm_vars_tmp
)
for iter_var in trksVars_tmp:
if iter_var not in trksVars:
for iter_var in trks_vars_tmp:
if iter_var not in trks_vars:
logger.warning(
f"Variable {iter_var} of {datasets_labels[counter]} "
f"not in {datasets_labels[0]} track collection. "
......@@ -349,7 +347,7 @@ def plot_input_vars_trks(
logger.info(f"Track origin: {track_origin}\n")
# Loop over variables
for var in trksVars:
for var in trks_vars:
if var in bins_dict:
logger.info(f"Plotting {var}...")
......@@ -472,11 +470,6 @@ def plot_input_vars_jets(
**kwargs: dict
Keyword arguments passed to the plot. You can use all arguments that are
supported by the `histogram_plot` class in the plotting API.
Raises
------
ValueError
If the type of the given binning is not supported.
"""
kwargs = check_kwargs_for_ylabel_and_n_ratio_panel(
......
# flake8: noqa
# pylint: skip-file
from umami.metrics.metrics import (
CalcDiscValues,
GetRejection,
GetScore,
calc_disc_values,
calc_eff,
calc_rej,
discriminant_output_shape,
get_gradients,
get_rejection,
get_score,
)
from umami.metrics.tools import eff_err, rej_err
......@@ -20,7 +20,7 @@ except ModuleNotFoundError:
pass
def CalcDiscValues(
def calc_disc_values(
jets_dict: dict,
index_dict: dict,
main_class: str,
......@@ -166,7 +166,7 @@ def CalcDiscValues(
return disc_values
def GetScore(
def get_score(
y_pred: np.ndarray,
class_labels: list,
main_class: str,
......@@ -299,8 +299,8 @@ def discriminant_output_shape(input_shape: tuple) -> tuple:
def get_gradients(
model: object,
X: np.ndarray,
nJets: int,
arr: np.ndarray,
n_jets: int,
):
"""
Calculating the gradients with respect to the input variables.
......@@ -312,9 +312,9 @@ def get_gradients(
----------
model : object
Loaded keras model.
X : numpy.ndarray
arr : numpy.ndarray
Track inputs of the jets.
nJets : int
n_jets : int
Number of jets to be used.
Returns
......@@ -330,12 +330,12 @@ def get_gradients(
# Pass in the cts and categorical inputs, as well as the learning phase
# (0 for test mode)
gradients = compute_gradients([X[:nJets], 0])
gradients = compute_gradients([arr[:n_jets], 0])
return gradients[0]
def GetRejection(
def get_rejection(
y_pred: np.ndarray,
y_true: np.ndarray,
class_labels: list,
......@@ -493,7 +493,7 @@ def GetRejection(
index_dict.update({f"{class_label}": class_counter})
# Calculate disc score for the main class
disc_scores = CalcDiscValues(
disc_scores = calc_disc_values(
jets_dict=jets_dict,
index_dict=index_dict,
main_class=main_class,
......@@ -524,7 +524,7 @@ def GetRejection(
rej_dict[dict_key] = 1 / (
len(
jets_dict[iter_main_class][
CalcDiscValues(
calc_disc_values(
jets_dict=jets_dict,
index_dict=index_dict,
main_class=main_class,
......
......@@ -5,8 +5,8 @@ from umami.configuration import logger
def eff_err(
x: np.ndarray,
N: int,
arr: np.ndarray,
n_counts: int,
suppress_zero_divison_error: bool = False,
norm: bool = False,
) -> np.ndarray:
......@@ -14,9 +14,9 @@ def eff_err(
Parameters
----------
x : numpy.array
arr : numpy.array
efficiency values
N : int