From a53d7d9e6f304d1046278569490b7ddd3bbedaab Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 13:10:30 +0200 Subject: [PATCH 01/28] fix invalid-names from pylint --- .gitlab/workflow/replace_placeholders_in_md.py | 4 ++-- scripts/check_lwtnn-model.py | 4 ++-- scripts/conv_lwtnn_model.py | 4 ++-- scripts/create_lwtnn_vardict.py | 4 ++-- umami/evaluate_model.py | 4 ++-- umami/plot_input_variables.py | 4 ++-- umami/plotting_epoch_performance.py | 4 ++-- umami/plotting_umami.py | 16 ++++++++-------- umami/preprocessing.py | 4 ++-- .../unit/preprocessing/test_preprocessing.py | 4 ++-- umami/train.py | 4 ++-- 11 files changed, 28 insertions(+), 28 deletions(-) diff --git a/.gitlab/workflow/replace_placeholders_in_md.py b/.gitlab/workflow/replace_placeholders_in_md.py index 629adc39..749e9359 100644 --- a/.gitlab/workflow/replace_placeholders_in_md.py +++ b/.gitlab/workflow/replace_placeholders_in_md.py @@ -9,7 +9,7 @@ from shutil import copyfile from subprocess import run -def GetParser(): +def get_parser(): """ Argument parser for example code replacer @@ -236,7 +236,7 @@ def replace_placeholder_with_file_content( def main(): """Main function that is called when executing the script.""" - args = GetParser() + args = get_parser() print(f"Replacing placeholders in the following files: {args.input}") # Process each input file with the replacement function diff --git a/scripts/check_lwtnn-model.py b/scripts/check_lwtnn-model.py index 86b00719..28b3bce9 100644 --- a/scripts/check_lwtnn-model.py +++ b/scripts/check_lwtnn-model.py @@ -15,7 +15,7 @@ from umami.tf_tools import Sum from umami.tools import yaml_loader -def GetParser(): +def get_parser(): """ Argparse option for create_vardict script. @@ -166,7 +166,7 @@ def main(): """ # Get the arguments - args = GetParser() + args = get_parser() # Load the config file eval_config = prepareConfig(args.config) diff --git a/scripts/conv_lwtnn_model.py b/scripts/conv_lwtnn_model.py index 42c8ef1a..d80cb735 100644 --- a/scripts/conv_lwtnn_model.py +++ b/scripts/conv_lwtnn_model.py @@ -9,7 +9,7 @@ from tensorflow.keras.utils import CustomObjectScope # pylint: disable=E0401 from umami.tf_tools import Sum -def GetParser(): +def get_parser(): """ Argparse option for conv_model script. @@ -39,7 +39,7 @@ def GetParser(): def __run(): - args = GetParser() + args = get_parser() with CustomObjectScope({"GlorotUniform": glorot_uniform(), "Sum": Sum}): model = load_model(args.model_file) # get the architecture as a json string diff --git a/scripts/create_lwtnn_vardict.py b/scripts/create_lwtnn_vardict.py index 319327fd..0e19780a 100644 --- a/scripts/create_lwtnn_vardict.py +++ b/scripts/create_lwtnn_vardict.py @@ -7,7 +7,7 @@ from umami.configuration import logger from umami.preprocessing_tools import GetVariableDict -def GetParser(): +def get_parser(): """ Argparse option for create_vardict script. @@ -186,7 +186,7 @@ def GetJetVariables( def __run(): """main part of script generating json file""" - args = GetParser() + args = get_parser() variable_config = GetVariableDict(args.var_dict) if "dips" in args.tagger.lower(): diff --git a/umami/evaluate_model.py b/umami/evaluate_model.py index c74fa75a..526c8e41 100644 --- a/umami/evaluate_model.py +++ b/umami/evaluate_model.py @@ -24,7 +24,7 @@ from umami.preprocessing_tools import Configuration tf.compat.v1.disable_eager_execution() -def GetParser(): +def get_parser(): """ Argument parser for the evaluation script. @@ -978,7 +978,7 @@ def EvaluateModelDL1( if __name__ == "__main__": - parser_args = GetParser() + parser_args = get_parser() training_config = utt.Configuration(parser_args.config_file) # Check for evaluation only (= evaluation of tagger scores in files) is used: diff --git a/umami/plot_input_variables.py b/umami/plot_input_variables.py index 9d491ed7..88564604 100644 --- a/umami/plot_input_variables.py +++ b/umami/plot_input_variables.py @@ -14,7 +14,7 @@ from umami.plotting.utils import translate_kwargs from umami.tools import yaml_loader -def GetParser(): +def get_parser(): """ Argument parser for Preprocessing script. @@ -192,7 +192,7 @@ def plot_jets_variables(plot_config, plot_type): if __name__ == "__main__": - args = GetParser() + args = get_parser() if not (args.jets or args.tracks): raise Exception( diff --git a/umami/plotting_epoch_performance.py b/umami/plotting_epoch_performance.py index 2f6c39bf..9cfd2b5f 100644 --- a/umami/plotting_epoch_performance.py +++ b/umami/plotting_epoch_performance.py @@ -13,7 +13,7 @@ from umami.preprocessing_tools import Configuration from umami.train_tools import RunPerformanceCheck -def GetParser(): +def get_parser(): """ Argument parser for Preprocessing script. @@ -197,7 +197,7 @@ def main(args, train_config, preprocess_config): if __name__ == "__main__": - parser_args = GetParser() + parser_args = get_parser() gpus = tf.config.experimental.list_physical_devices("GPU") for gpu in gpus: diff --git a/umami/plotting_umami.py b/umami/plotting_umami.py index 0bf45d89..9f56e2a9 100644 --- a/umami/plotting_umami.py +++ b/umami/plotting_umami.py @@ -23,7 +23,7 @@ import umami.evaluation_tools as uet from umami.configuration import logger -def GetParser(): +def get_parser(): """ Argument parser for Preprocessing script. @@ -166,10 +166,10 @@ def plot_ROC( eval_epoch = int(eval_params["epoch"]) if "nTest" not in plot_config["plot_settings"].keys(): - nTest_provided = False + n_test_provided = False plot_config["plot_settings"]["nTest"] = [] else: - nTest_provided = True + n_test_provided = True for model_name, model_config in plot_config["models_to_plot"].items(): if print_model: @@ -200,7 +200,7 @@ def plot_ROC( colors.append(model_config["color"]) # nTest is only needed to calculate binomial errors - if not nTest_provided and ( + if not n_test_provided and ( "binomialErrors" in plot_config["plot_settings"] and plot_config["plot_settings"]["binomialErrors"] ): @@ -264,10 +264,10 @@ def plot_ROC_Comparison( eval_epoch = int(eval_params["epoch"]) if "nTest" not in plot_config["plot_settings"].keys(): - nTest_provided = False + n_test_provided = False plot_config["plot_settings"]["nTest"] = [] else: - nTest_provided = True + n_test_provided = True for model_name, model_config in plot_config["models_to_plot"].items(): if print_model: @@ -298,7 +298,7 @@ def plot_ROC_Comparison( colors.append(model_config["color"]) # nTest is only needed to calculate binomial errors - if not nTest_provided and ( + if not n_test_provided and ( "binomialErrors" in plot_config["plot_settings"] and plot_config["plot_settings"]["binomialErrors"] ): @@ -1482,5 +1482,5 @@ def main(args): if __name__ == "__main__": - parser_args = GetParser() + parser_args = get_parser() main(parser_args) diff --git a/umami/preprocessing.py b/umami/preprocessing.py index 48955634..49f2ed44 100644 --- a/umami/preprocessing.py +++ b/umami/preprocessing.py @@ -6,7 +6,7 @@ import umami.preprocessing_tools as upt from umami.configuration import logger -def GetParser(): +def get_parser(): """ Argument parser for Preprocessing script. @@ -104,7 +104,7 @@ def GetParser(): if __name__ == "__main__": - args = GetParser() + args = get_parser() config = upt.Configuration(args.config_file) # Check for preparation diff --git a/umami/tests/unit/preprocessing/test_preprocessing.py b/umami/tests/unit/preprocessing/test_preprocessing.py index 3845f317..4b4fe30a 100644 --- a/umami/tests/unit/preprocessing/test_preprocessing.py +++ b/umami/tests/unit/preprocessing/test_preprocessing.py @@ -4,7 +4,7 @@ import unittest # noqa from unittest import mock from umami.configuration import logger, set_log_level -from umami.preprocessing import GetParser +from umami.preprocessing import get_parser set_log_level(logger, "DEBUG") @@ -35,7 +35,7 @@ class PreprocessingTestParser(unittest.TestCase): ), ) def test_Parser(self, mock_args): - parser = GetParser() + parser = get_parser() self.assertEqual(parser.config_file, self.config_file) self.assertFalse(parser.tracks) diff --git a/umami/train.py b/umami/train.py index 93a76268..acc9da6e 100644 --- a/umami/train.py +++ b/umami/train.py @@ -9,7 +9,7 @@ import umami.preprocessing_tools as upt import umami.train_tools as utt -def GetParser(): +def get_parser(): """ Argument parser for the train executable. @@ -49,7 +49,7 @@ def GetParser(): if __name__ == "__main__": # Get the args from parser - args = GetParser() + args = get_parser() # Check if GPUs are available gpus = tf.config.experimental.list_physical_devices("GPU") -- GitLab From 7edc9f9b57063c19b7b9bd2491e30bd5a3d4a598 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 13:13:46 +0200 Subject: [PATCH 02/28] replace Error with error --- umami/data_tools/Cuts.py | 8 +++---- umami/evaluate_model.py | 4 ++-- umami/metrics/metrics.py | 4 ++-- umami/preprocessing_tools/Preparation.py | 8 +++---- .../resampling/count_sampling.py | 4 ++-- .../resampling/pdf_sampling.py | 4 ++-- .../resampling/resampling_base.py | 4 ++-- umami/preprocessing_tools/utils.py | 12 +++++----- .../tests/integration/test_plotting_umami.py | 4 ++-- umami/tests/integration/test_preprocessing.py | 24 +++++++++---------- umami/tests/integration/test_train.py | 12 +++++----- umami/tf_tools/load_tfrecord.py | 8 +++---- 12 files changed, 48 insertions(+), 48 deletions(-) diff --git a/umami/data_tools/Cuts.py b/umami/data_tools/Cuts.py index bea1eba1..e7b69cbf 100644 --- a/umami/data_tools/Cuts.py +++ b/umami/data_tools/Cuts.py @@ -91,19 +91,19 @@ def GetSampleCuts(jets: np.ndarray, cuts: list) -> np.ndarray: found = re.search(r"mod_(\d+?)_([=!><]+)", op) modulo = int(found.group(1)) op = found.group(2) - except AttributeError as Error: + except AttributeError as error: raise RuntimeError( "Incorrect use of modulo cut for sample: " " specify in config as mod_N_op with" " N as an integer and op the operator" " used for testing the condition." - ) from Error - except KeyError as Error: + ) from error + except KeyError as error: raise RuntimeError( "Incorrect use of modulo cut for sample: " " only supported operators 'op' in mod_N_op are: " f" {list(inverted_ops.keys())}." - ) from Error + ) from error cut_rejection = inverted_ops[op]((jets[cut] % modulo), cond) else: if op in list(inverted_ops.keys()): # pylint: disable=C0201: diff --git a/umami/evaluate_model.py b/umami/evaluate_model.py index 526c8e41..374d2494 100644 --- a/umami/evaluate_model.py +++ b/umami/evaluate_model.py @@ -169,13 +169,13 @@ def EvaluateModel( ) try: assert isinstance(tagger_list, list) - except AssertionError as Error: + except AssertionError as error: raise ValueError( """ Tagger given in Eval_parameters_validation is not a string or a list! """ - ) from Error + ) from error # evaluate trained model file (for evaluate_trained_model: True in config) if Eval_model_bool: diff --git a/umami/metrics/metrics.py b/umami/metrics/metrics.py index a4ce70d6..645991cf 100644 --- a/umami/metrics/metrics.py +++ b/umami/metrics/metrics.py @@ -537,12 +537,12 @@ def GetRejection( / (len(jets_dict[iter_main_class]) + 1e-10) ) - except ZeroDivisionError as Error: + except ZeroDivisionError as error: raise ZeroDivisionError( "Not enough jets for rejection calculation of class " f"{iter_main_class} for {target_eff} efficiency!\n" "Maybe loosen the eff_min to fix it or give more jets!" - ) from Error + ) from error return rej_dict, cutvalue diff --git a/umami/preprocessing_tools/Preparation.py b/umami/preprocessing_tools/Preparation.py index 30021f83..4fbf6035 100644 --- a/umami/preprocessing_tools/Preparation.py +++ b/umami/preprocessing_tools/Preparation.py @@ -79,8 +79,8 @@ class PrepareSamples: samples = self.config.preparation["samples"] try: sample = samples[args.sample] - except KeyError as Error: - raise KeyError(f'sample "{args.sample}" not in config file!') from Error + except KeyError as error: + raise KeyError(f'sample "{args.sample}" not in config file!') from error self.sample_type = sample.get("type") self.sample_category = sample.get("category") @@ -90,11 +90,11 @@ class PrepareSamples: else: try: category_setup = global_config.flavour_categories[self.sample_category] - except KeyError as Error: + except KeyError as error: raise KeyError( f"Requested sample category {self.sample_category} not" " defined in global config." - ) from Error + ) from error # retrieving the cuts for the category selection category_cuts = GetCategoryCuts( diff --git a/umami/preprocessing_tools/resampling/count_sampling.py b/umami/preprocessing_tools/resampling/count_sampling.py index bfa651ef..07b31473 100644 --- a/umami/preprocessing_tools/resampling/count_sampling.py +++ b/umami/preprocessing_tools/resampling/count_sampling.py @@ -285,12 +285,12 @@ class ProbabilityRatioUnderSampling(UnderSampling): try: target_distribution = self.options["target_distribution"] - except KeyError as Error: + except KeyError as error: raise ValueError( "Resampling method probabilty_ratio requires a target" " distribution class in the options block of the configuration" " file (i.e. bjets, cjets, ujets)." - ) from Error + ) from error self.ConcatenateSamples() diff --git a/umami/preprocessing_tools/resampling/pdf_sampling.py b/umami/preprocessing_tools/resampling/pdf_sampling.py index 69469c8f..90cfabca 100644 --- a/umami/preprocessing_tools/resampling/pdf_sampling.py +++ b/umami/preprocessing_tools/resampling/pdf_sampling.py @@ -591,12 +591,12 @@ class PDFSampling(Resampling): # pylint: disable=too-many-public-methods try: samples = self.options["samples"] - except KeyError as Error: + except KeyError as error: raise KeyError( "You chose the 'pdf' option for the sampling but didn't" "provide the samples to use. Please specify them in the" "configuration file!" - ) from Error + ) from error # saving a list of sample categories with associated IDs self.sample_categories = { diff --git a/umami/preprocessing_tools/resampling/resampling_base.py b/umami/preprocessing_tools/resampling/resampling_base.py index 2e1fc7d9..ab69e421 100644 --- a/umami/preprocessing_tools/resampling/resampling_base.py +++ b/umami/preprocessing_tools/resampling/resampling_base.py @@ -807,12 +807,12 @@ class ResamplingTools(Resampling): self.samples = {} try: samples = self.options["samples"] - except KeyError as Error: + except KeyError as error: raise KeyError( "You chose the 'count' or 'probability_ratio' option " "for the sampling but didn't provide the samples to use. " "Please specify them in the configuration file!" - ) from Error + ) from error # list of sample classes, bjets, cjets, etc valid_class_categories = self.GetValidClassCategories(samples) diff --git a/umami/preprocessing_tools/utils.py b/umami/preprocessing_tools/utils.py index ae3d4fb4..7c3b0116 100644 --- a/umami/preprocessing_tools/utils.py +++ b/umami/preprocessing_tools/utils.py @@ -188,7 +188,7 @@ def plot_variable( else 50, ) - except IndexError as Error: + except IndexError as error: if var_type.casefold() == "jets": array = np.nan_to_num(df[:, variable_index]) @@ -198,7 +198,7 @@ def plot_variable( else: raise TypeError( f"Variable type {var_type} not supported! Only jets and tracks!" - ) from Error + ) from error _, bins = np.histogram( a=array, @@ -221,7 +221,7 @@ def plot_variable( except AttributeError: flavour_jets = df[variable][labels[:, flav_counter] == 1] - except IndexError as Error: + except IndexError as error: if var_type.casefold() == "jets": flavour_jets = df[:, variable_index][ labels[:, flav_counter] == 1 @@ -233,7 +233,7 @@ def plot_variable( else: raise TypeError( f"Variable type {var_type} not supported! Only jets and tracks!" - ) from Error + ) from error # Calculate bins hist_bins, weights, unc, band = hist_w_unc( @@ -735,13 +735,13 @@ def generate_process_tag( combined_sample = True processes += f" + {label}" logger.info(f"Found the process '{process}' with the label '{label}'") - except KeyError as Error: + except KeyError as error: raise KeyError( f"Plot label for the process {process} was not" "found. Make sure your entries in the 'ntuples'" "section are valid entries that have a matching entry" "in the global config." - ) from Error + ) from error # Combine the string that contains the latex code for the processes # and the "sqrt(s)..." and "PFlow Jets" part if combined_sample is True: diff --git a/umami/tests/integration/test_plotting_umami.py b/umami/tests/integration/test_plotting_umami.py index 37dc6bf4..03f88ed1 100644 --- a/umami/tests/integration/test_plotting_umami.py +++ b/umami/tests/integration/test_plotting_umami.py @@ -86,8 +86,8 @@ def runPlotting(config, tagger): try: run_plotting_umami.check_returncode() - except CalledProcessError as Error: - raise AssertionError(f"Test failed: plotting_umami.py for {tagger}.") from Error + except CalledProcessError as error: + raise AssertionError(f"Test failed: plotting_umami.py for {tagger}.") from error return True diff --git a/umami/tests/integration/test_preprocessing.py b/umami/tests/integration/test_preprocessing.py index 2ea3d705..a47a3d2f 100644 --- a/umami/tests/integration/test_preprocessing.py +++ b/umami/tests/integration/test_preprocessing.py @@ -107,10 +107,10 @@ def runPreprocessing(config: dict, tagger: str, method: str) -> bool: try: run_prepare.check_returncode() - except CalledProcessError as Error: + except CalledProcessError as error: raise AssertionError( f"Test failed: preprocessing.py --prepare: {sample}" - ) from Error + ) from error logger.info("Test: running the resampling...") run_resampling = run( @@ -127,8 +127,8 @@ def runPreprocessing(config: dict, tagger: str, method: str) -> bool: try: run_resampling.check_returncode() - except CalledProcessError as Error: - raise AssertionError("Test failed: preprocessing.py --resampling.") from Error + except CalledProcessError as error: + raise AssertionError("Test failed: preprocessing.py --resampling.") from error logger.info("Test: retrieving scaling and shifting factors...") run_scaling = run( @@ -147,8 +147,8 @@ def runPreprocessing(config: dict, tagger: str, method: str) -> bool: try: run_scaling.check_returncode() - except CalledProcessError as Error: - raise AssertionError("Test failed: preprocessing.py --scaling.") from Error + except CalledProcessError as error: + raise AssertionError("Test failed: preprocessing.py --scaling.") from error logger.info("Test: applying shifting and scaling factors...") run_apply_scales = run( @@ -164,8 +164,8 @@ def runPreprocessing(config: dict, tagger: str, method: str) -> bool: try: run_apply_scales.check_returncode() - except CalledProcessError as Error: - raise AssertionError("Test failed: preprocessing.py --apply_scales.") from Error + except CalledProcessError as error: + raise AssertionError("Test failed: preprocessing.py --apply_scales.") from error logger.info("Test: shuffling the samples and writing the samples to disk...") run_write = run( @@ -182,8 +182,8 @@ def runPreprocessing(config: dict, tagger: str, method: str) -> bool: try: run_write.check_returncode() - except CalledProcessError as Error: - raise AssertionError("Test failed: preprocessing.py --write.") from Error + except CalledProcessError as error: + raise AssertionError("Test failed: preprocessing.py --write.") from error logger.info( "Test: shuffling the samples, writing the samples to disk and convert" @@ -205,10 +205,10 @@ def runPreprocessing(config: dict, tagger: str, method: str) -> bool: try: run_record.check_returncode() - except CalledProcessError as Error: + except CalledProcessError as error: raise AssertionError( "Test failed: preprocessing.py --to_records." - ) from Error + ) from error tagger_path = f"./test_preprocessing_{tagger}/" if not os.path.isdir(tagger_path): diff --git a/umami/tests/integration/test_train.py b/umami/tests/integration/test_train.py index 0d4253f9..92272054 100644 --- a/umami/tests/integration/test_train.py +++ b/umami/tests/integration/test_train.py @@ -271,8 +271,8 @@ def runTraining(config: dict, tagger: str) -> bool: try: run_train.check_returncode() - except CalledProcessError as Error: - raise AssertionError(f"Test failed: train.py for {tagger}.") from Error + except CalledProcessError as error: + raise AssertionError(f"Test failed: train.py for {tagger}.") from error logger.info(f"Test: running plotting_epoch_performance.py for {tagger}...") @@ -302,10 +302,10 @@ def runTraining(config: dict, tagger: str) -> bool: try: run_plot_epoch.check_returncode() - except CalledProcessError as Error: + except CalledProcessError as error: raise AssertionError( f"Test failed: plotting_epoch_performance.py for {tagger}." - ) from Error + ) from error logger.info(f"Test: running evaluate_model.py for {tagger}...") run_evaluate_model = run( @@ -323,8 +323,8 @@ def runTraining(config: dict, tagger: str) -> bool: try: run_evaluate_model.check_returncode() - except CalledProcessError as Error: - raise AssertionError(f"Test failed: evaluate_model.py for {tagger}.") from Error + except CalledProcessError as error: + raise AssertionError(f"Test failed: evaluate_model.py for {tagger}.") from error return True diff --git a/umami/tf_tools/load_tfrecord.py b/umami/tf_tools/load_tfrecord.py index edab1408..950fea38 100644 --- a/umami/tf_tools/load_tfrecord.py +++ b/umami/tf_tools/load_tfrecord.py @@ -215,10 +215,10 @@ class TFRecordReader: shape=shapes[f"shape_X_{self.tracks_name}_train"], dtype=tf.float32 ) - except KeyError as Error: + except KeyError as error: raise KeyError( f"Track collection {self.tracks_name} not in metadata file!" - ) from Error + ) from error # Set label shape shapes["shape_Y"] = [metadata["n_dim"]] @@ -235,10 +235,10 @@ class TFRecordReader: shape=shapes["shape_Add_Vars"], dtype=tf.float32 ) - except KeyError as Error: + except KeyError as error: raise KeyError( "No conditional information saved in tfrecords metadata file!" - ) from Error + ) from error # Get the parser parse_ex = tf.io.parse_example(record_bytes, features) # pylint: disable=E1120 -- GitLab From 7bd1a324c9a5210e09cbd831a4f5a613ab7bbe79 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 13:18:09 +0200 Subject: [PATCH 03/28] small fixes --- docs/setup/development/good_practices_code.md | 10 +++++----- umami/plotting_umami.py | 16 ++++++++-------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/setup/development/good_practices_code.md b/docs/setup/development/good_practices_code.md index 7ecf0318..3e29cf25 100644 --- a/docs/setup/development/good_practices_code.md +++ b/docs/setup/development/good_practices_code.md @@ -108,12 +108,12 @@ Since Python 3.6 the so-called f-strings were introduced providing a powerful sy A simple example ```python -nJets = 2_300 +n_jets = 2_300 jet_collection = "EMPFlow" -info_text = f"We are using the {jet_collection} jet collection and have {nJets} available." +info_text = f"We are using the {jet_collection} jet collection and have {n_jets} available." ## arbitrary operations -info_text_event = f"We are using the {jet_collection} jet collection and have {nJets * 4} available." +info_text_event = f"We are using the {jet_collection} jet collection and have {n_jets * 4} available." ``` ### Integer division @@ -122,10 +122,10 @@ In Python 3 a dedicated integer division was introduced. ```python # standard division -> returns by default a flaot (no rounding) -nEvents = nJets / 4 +n_events = n_jets / 4 # integer division -> rounds to integer precision -nEvents = nJets // 4 +n_events = n_jets // 4 ``` ### Type declaration in functions diff --git a/umami/plotting_umami.py b/umami/plotting_umami.py index 9f56e2a9..bd69e2b8 100644 --- a/umami/plotting_umami.py +++ b/umami/plotting_umami.py @@ -132,7 +132,7 @@ def plot_probability_comparison( ) -def plot_ROC( +def plot_roc( plot_name: str, plot_config: dict, eval_params: dict, @@ -230,7 +230,7 @@ def plot_ROC( ) -def plot_ROC_Comparison( +def plot_roc_Comparison( plot_name: str, plot_config: dict, eval_params: dict, @@ -334,7 +334,7 @@ def plot_ROC_Comparison( ) -def plot_ROCvsVar( +def plot_roc_vs_var( plot_name: str, plot_config: dict, eval_params: dict, @@ -564,7 +564,7 @@ def plot_ROCvsVar( ) -def plot_ROCvsVar_comparison( +def plot_roc_vs_var_comparison( plot_name: str, plot_config: dict, eval_params: dict, @@ -1340,7 +1340,7 @@ def SetUpPlots( ) elif plot_config["type"] == "ROC": - plot_ROC( + plot_roc( plot_name=save_plot_to, plot_config=plot_config, eval_params=eval_params, @@ -1349,7 +1349,7 @@ def SetUpPlots( ) elif plot_config["type"] == "ROC_Comparison": - plot_ROC_Comparison( + plot_roc_Comparison( plot_name=save_plot_to, plot_config=plot_config, eval_params=eval_params, @@ -1417,7 +1417,7 @@ def SetUpPlots( ) elif plot_config["type"] == "ROCvsVar": - plot_ROCvsVar( + plot_roc_vs_var( plot_name=save_plot_to, plot_config=plot_config, eval_params=eval_params, @@ -1425,7 +1425,7 @@ def SetUpPlots( ) elif plot_config["type"] == "ROCvsVar_comparison": - plot_ROCvsVar_comparison( + plot_roc_vs_var_comparison( plot_name=save_plot_to, plot_config=plot_config, eval_params=eval_params, -- GitLab From a8f1d578d97936b3beb20fe72f0e645bf325bb1f Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 13:23:32 +0200 Subject: [PATCH 04/28] rename getConfiguration to get_configuration --- umami/tests/integration/test_input_vars_plot.py | 4 ++-- umami/tests/integration/test_plotting_umami.py | 4 ++-- umami/tests/integration/test_preprocessing.py | 4 ++-- umami/tests/integration/test_train.py | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/umami/tests/integration/test_input_vars_plot.py b/umami/tests/integration/test_input_vars_plot.py index c7a7bc20..5dbc3860 100644 --- a/umami/tests/integration/test_input_vars_plot.py +++ b/umami/tests/integration/test_input_vars_plot.py @@ -13,7 +13,7 @@ from umami.tools import yaml_loader set_log_level(logger, "DEBUG") -def getConfiguration() -> object: +def get_configuration() -> object: """ Load yaml file with settings for integration test of the input vars plotting. @@ -98,7 +98,7 @@ class TestInput_Vars_Plotting(unittest.TestCase): def setUp(self): """Download test files for input var plots.""" # Get test configuration - self.data = getConfiguration() + self.data = get_configuration() test_dir = os.path.join(self.data["test_input_vars_plot"]["testdir"]) logging.info(f"Creating test directory in {test_dir}") diff --git a/umami/tests/integration/test_plotting_umami.py b/umami/tests/integration/test_plotting_umami.py index 03f88ed1..39d3fa07 100644 --- a/umami/tests/integration/test_plotting_umami.py +++ b/umami/tests/integration/test_plotting_umami.py @@ -19,7 +19,7 @@ from umami.tools import replaceLineInFile, yaml_loader set_log_level(logger, "DEBUG") -def getConfiguration(): +def get_configuration(): """ Load yaml file with settings for integration test of dips training. @@ -101,7 +101,7 @@ class TestPlottingUmami(unittest.TestCase): def setUp(self): """Download test files for running the dips training.""" # Get test configuration - self.data = getConfiguration() + self.data = get_configuration() self.model_name_dips = self.data["test_dips"]["model_name"] self.model_name_umami = self.data["test_umami"]["model_name"] self.model_name_dl1r = self.data["test_dl1r"]["model_name"] diff --git a/umami/tests/integration/test_preprocessing.py b/umami/tests/integration/test_preprocessing.py index a47a3d2f..72440a7c 100644 --- a/umami/tests/integration/test_preprocessing.py +++ b/umami/tests/integration/test_preprocessing.py @@ -17,7 +17,7 @@ from umami.tools import replaceLineInFile, yaml_loader set_log_level(logger, "DEBUG") -def getConfiguration(): +def get_configuration(): """ Load yaml file with settings for integration test of preprocessing. @@ -280,7 +280,7 @@ class TestPreprocessing(unittest.TestCase): preprocessing config file. """ # Get test configuration - self.data = getConfiguration() + self.data = get_configuration() test_dir = os.path.join(self.data["test_preprocessing"]["testdir"]) logger.info(f"Creating test directory in {test_dir}") diff --git a/umami/tests/integration/test_train.py b/umami/tests/integration/test_train.py index 92272054..23cc58fb 100644 --- a/umami/tests/integration/test_train.py +++ b/umami/tests/integration/test_train.py @@ -18,7 +18,7 @@ from umami.tools import replaceLineInFile, yaml_loader set_log_level(logger, "DEBUG") -def getConfiguration(): +def get_configuration(): """ Load yaml file with settings for integration test of dips training. @@ -74,7 +74,7 @@ def prepareConfig( """ # Get test configuration - data = getConfiguration() + data = get_configuration() if tagger != "evaluate_comp_taggers": # For CADS, the files from the umami preprocessing are used. @@ -338,7 +338,7 @@ class TestTraining(unittest.TestCase): def setUp(self): """Download test files for running the dips training.""" # Get test configuration - self.data = getConfiguration() + self.data = get_configuration() self.test_dir_path = tempfile.TemporaryDirectory() # pylint: disable=R1732 self.test_dir = f"{self.test_dir_path.name}" -- GitLab From fffd2c6b6aed43b3640d58aa44981470879eaf31 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 13:25:30 +0200 Subject: [PATCH 05/28] change chunkSize to chunk_size --- umami/preprocessing.py | 2 +- umami/preprocessing_tools/Scaling.py | 42 +++++++++---------- .../preprocessing_tools/Writing_Train_File.py | 16 +++---- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/umami/preprocessing.py b/umami/preprocessing.py index 49f2ed44..1e022f54 100644 --- a/umami/preprocessing.py +++ b/umami/preprocessing.py @@ -162,7 +162,7 @@ if __name__ == "__main__": # Calculate the scale dicts of the previous resampled files elif args.scaling: Scaling = upt.Scaling(config) - Scaling.GetScaleDict(chunkSize=args.chunk_size) + Scaling.GetScaleDict(chunk_size=args.chunk_size) # Apply scaling of the previous calculated scale dicts elif args.apply_scales: diff --git a/umami/preprocessing_tools/Scaling.py b/umami/preprocessing_tools/Scaling.py index 0282c927..447c670b 100644 --- a/umami/preprocessing_tools/Scaling.py +++ b/umami/preprocessing_tools/Scaling.py @@ -478,7 +478,7 @@ class Scaling: def GetScaleDict( self, input_file: str = None, - chunkSize: int = 1e5, + chunk_size: int = 1e5, ): """ Calculates the scaling, shifting and default values and saves them to json. @@ -487,7 +487,7 @@ class Scaling: ---------- input_file : str, optional File which is used to calculate scaling/shifting, by default None - chunkSize : int, optional + chunk_size : int, optional Scale dict calculated using the given file, by default 1e5 """ @@ -506,13 +506,13 @@ class Scaling: file_length = len(h5py.File(input_file, "r")["/jets"].fields(var_list[0])[:]) # Get the number of chunks we need to load - n_chunks = int(np.ceil(file_length / chunkSize)) + n_chunks = int(np.ceil(file_length / chunk_size)) # Get the jets scaling generator jets_scaling_generator = self.get_scaling_generator( input_file=input_file, nJets=file_length, - chunkSize=chunkSize, + chunk_size=chunk_size, ) # Loop over chunks @@ -552,7 +552,7 @@ class Scaling: input_file=input_file, nJets=file_length, tracks_name=tracks_name, - chunkSize=chunkSize, + chunk_size=chunk_size, ) # Loop over chunks @@ -598,7 +598,7 @@ class Scaling: self, input_file: str, nJets: int, - chunkSize: int = int(10000), + chunk_size: int = int(10000), ): """ Set up a generator that loads the jets in chunks and calculates the mean/std. @@ -609,7 +609,7 @@ class Scaling: File which is to be scaled. nJets : int Number of jets which are to be scaled. - chunkSize : int, optional + chunk_size : int, optional The number of jets which are loaded and scaled/shifted per step, by default int(10000) @@ -634,7 +634,7 @@ class Scaling: # Loop over indicies while start_ind < nJets: # Calculate end index of the chunk - end_ind = int(start_ind + chunkSize) + end_ind = int(start_ind + chunk_size) # Check if end index is bigger than Njets end_ind = min(end_ind, nJets) @@ -661,7 +661,7 @@ class Scaling: jets.replace([np.inf, -np.inf], np.nan, inplace=True) if "weight" not in jets: - length = nJets if nJets < chunkSize else len(jets) + length = nJets if nJets < chunk_size else len(jets) jets["weight"] = np.ones(int(length)) # Iterate over the vars of the jets @@ -698,7 +698,7 @@ class Scaling: input_file: str, nJets: int, tracks_name: str, - chunkSize: int = int(10000), + chunk_size: int = int(10000), ): """ Set up a generator that loads the tracks in chunks and calculates the mean/std. @@ -712,7 +712,7 @@ class Scaling: Number of jets which are to be scaled. tracks_name : str Name of the tracks - chunkSize : int, optional + chunk_size : int, optional The number of jets which are loaded and scaled/shifted per step, by default int(10000) @@ -743,7 +743,7 @@ class Scaling: # Loop over indicies while start_ind < nJets: # Calculate end index of the chunk - end_ind = int(start_ind + chunkSize) + end_ind = int(start_ind + chunk_size) # Check if end index is bigger than Njets end_ind = min(end_ind, nJets) @@ -797,7 +797,7 @@ class Scaling: jets_default_dict: dict, nJets: int, tracks_scale_dict: dict = None, - chunkSize: int = int(10000), + chunk_size: int = int(10000), ): """ Set up a generator who applies the scaling/shifting for the given @@ -817,7 +817,7 @@ class Scaling: Number of jets which are to be scaled. tracks_scale_dict : dict, optional Scale dict of the track variables., by default None - chunkSize : int, optional + chunk_size : int, optional The number of jets which are loaded and scaled/shifted per step, by default int(10000) @@ -842,11 +842,11 @@ class Scaling: start_ind = 0 tupled_indices = [] while start_ind < nJets: - end_ind = int(start_ind + chunkSize) + end_ind = int(start_ind + chunk_size) end_ind = min(end_ind, nJets) tupled_indices.append((start_ind, end_ind)) start_ind = end_ind - end_ind = int(start_ind + chunkSize) + end_ind = int(start_ind + chunk_size) for index_tuple in tupled_indices: @@ -854,7 +854,7 @@ class Scaling: jets = pd.DataFrame(f["/jets"][index_tuple[0] : index_tuple[1]]) labels = pd.DataFrame(f["/labels"][index_tuple[0] : index_tuple[1]]) if "weight" not in jets: - length = nJets if nJets < chunkSize else len(jets) + length = nJets if nJets < chunk_size else len(jets) jets["weight"] = np.ones(int(length)) if "weight" not in jets_variables: @@ -910,7 +910,7 @@ class Scaling: def ApplyScales( self, input_file: str = None, - chunkSize: int = 1e6, + chunk_size: int = 1e6, ): """ Apply the scaling and shifting. @@ -919,7 +919,7 @@ class Scaling: ---------- input_file : str, optional File which is to be scaled., by default None - chunkSize : int, optional + chunk_size : int, optional The number of jets which are loaded and scaled/shifted per step, by default 1e6 """ @@ -939,7 +939,7 @@ class Scaling: file_length = len(h5py.File(input_file, "r")["/jets"][jets_variables[0]][:]) - n_chunks = int(np.ceil(file_length / chunkSize)) + n_chunks = int(np.ceil(file_length / chunk_size)) # Get scale dict with open(self.scale_dict_path, "r") as infile: @@ -968,7 +968,7 @@ class Scaling: jets_default_dict=jets_default_dict, nJets=file_length, tracks_scale_dict=tracks_scale_dict, - chunkSize=chunkSize, + chunk_size=chunk_size, ) logger.info("Applying scaling and shifting.") diff --git a/umami/preprocessing_tools/Writing_Train_File.py b/umami/preprocessing_tools/Writing_Train_File.py index 0c1eac27..ab837048 100644 --- a/umami/preprocessing_tools/Writing_Train_File.py +++ b/umami/preprocessing_tools/Writing_Train_File.py @@ -44,7 +44,7 @@ class TrainSampleWriter: input_file: str, index: list, nJets: int, - chunkSize: int = 100_000, + chunk_size: int = 100_000, ): """ Set up a generator who loads the scaled file and save it in the format for @@ -58,7 +58,7 @@ class TrainSampleWriter: List with the indicies. nJets : int Number of jets used. - chunkSize : int, optional + chunk_size : int, optional The number of jets which are loaded and scaled/shifted per step, by default 100_000 @@ -84,12 +84,12 @@ class TrainSampleWriter: tupled_indices = [] while start_ind < nJets: - end_ind = int(start_ind + chunkSize) + end_ind = int(start_ind + chunk_size) end_ind = min(end_ind, nJets) tupled_indices.append((start_ind, end_ind)) start_ind = end_ind - end_ind = int(start_ind + chunkSize) + end_ind = int(start_ind + chunk_size) for index_tuple in tupled_indices: @@ -181,7 +181,7 @@ class TrainSampleWriter: self, input_file: str = None, output_file: str = None, - chunkSize: int = 100_000, + chunk_size: int = 100_000, ) -> None: """ Write the training file. @@ -194,7 +194,7 @@ class TrainSampleWriter: output_file : str, optional Name of the output file. Default is name from config + resampled_scaled_shuffled., by default None - chunkSize : int, optional + chunk_size : int, optional The number of jets which are loaded and scaled/shifted per step, by default 100_000 """ @@ -223,7 +223,7 @@ class TrainSampleWriter: n_jets = len(h5py.File(input_file, "r")["/jets"]) # Get the number of chunks that need to be processed - n_chunks = int(np.ceil(n_jets / chunkSize)) + n_chunks = int(np.ceil(n_jets / chunk_size)) # Create an absolute index list for the file and shuffle it absolute_index = np.arange(n_jets) @@ -233,7 +233,7 @@ class TrainSampleWriter: input_file=input_file, index=absolute_index, nJets=n_jets, - chunkSize=chunkSize, + chunk_size=chunk_size, ) logger.info(f"Saving final train files to {out_file}") -- GitLab From 937fc5f431d3866f4be494bb9f04036ebbdaace5 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 13:33:12 +0200 Subject: [PATCH 06/28] small naming adaptions --- umami/evaluate_model.py | 20 ++++++++++---------- umami/plotting_umami.py | 8 ++++---- umami/tf_tools/load_tfrecord.py | 26 +++++++++++++------------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/umami/evaluate_model.py b/umami/evaluate_model.py index 374d2494..1f5f09a2 100644 --- a/umami/evaluate_model.py +++ b/umami/evaluate_model.py @@ -351,8 +351,8 @@ def EvaluateModel( f"{train_config.model_name}/results/" f"results{results_filename_extension}-rej_per_eff-{epoch}.h5", "a", - ) as f: - f.attrs["N_test"] = len(jets) + ) as h5_file: + h5_file.attrs["N_test"] = len(jets) # Get the rejections, discs and f_* values for the taggers tagger_fraction_rej_dict = uet.GetRejectionPerFractionDict( @@ -385,8 +385,8 @@ def EvaluateModel( f"{train_config.model_name}/results/" f"results{results_filename_extension}-rej_per_fractions-{args.epoch}.h5", "a", - ) as f: - f.attrs["N_test"] = len(jets) + ) as h5_file: + h5_file.attrs["N_test"] = len(jets) def EvaluateModelDips( @@ -627,8 +627,8 @@ def EvaluateModelDips( f"{train_config.model_name}/results/" f"results{results_filename_extension}-rej_per_eff-{args.epoch}.h5", "a", - ) as f: - f.attrs["N_test"] = len(jets) + ) as h5_file: + h5_file.attrs["N_test"] = len(jets) # Get the rejections, discs and f_* values for the taggers tagger_fraction_rej_dict = uet.GetRejectionPerFractionDict( @@ -660,8 +660,8 @@ def EvaluateModelDips( f"{train_config.model_name}/results/" f"results{results_filename_extension}-rej_per_fractions-{args.epoch}.h5", "a", - ) as f: - f.attrs["N_test"] = len(jets) + ) as h5_file: + h5_file.attrs["N_test"] = len(jets) if ( "Calculate_Saliency" in eval_params @@ -916,8 +916,8 @@ def EvaluateModelDL1( f"{train_config.model_name}/results/" f"results{results_filename_extension}-rej_per_eff-{args.epoch}.h5", "a", - ) as f: - f.attrs["N_test"] = len(jets) + ) as h5_file: + h5_file.attrs["N_test"] = len(jets) # Get the rejections, discs and f_* values for the taggers tagger_fraction_rej_dict = uet.GetRejectionPerFractionDict( diff --git a/umami/plotting_umami.py b/umami/plotting_umami.py index bd69e2b8..45b7931c 100644 --- a/umami/plotting_umami.py +++ b/umami/plotting_umami.py @@ -1168,12 +1168,12 @@ def plot_saliency( eval_file_dir + f'/saliency_{eval_epoch}_{plot_config["data_set_name"]}.pkl', "rb", - ) as f: - maps_dict = pickle.load(f) + ) as pkl_file: + maps_dict = pickle.load(pkl_file) else: - with open(plot_config["evaluation_file"], "rb") as f: - maps_dict = pickle.load(f) + with open(plot_config["evaluation_file"], "rb") as pkl_file: + maps_dict = pickle.load(pkl_file) uet.plotSaliency( maps_dict=maps_dict, diff --git a/umami/tf_tools/load_tfrecord.py b/umami/tf_tools/load_tfrecord.py index 950fea38..e2dad653 100644 --- a/umami/tf_tools/load_tfrecord.py +++ b/umami/tf_tools/load_tfrecord.py @@ -33,7 +33,7 @@ def load_tfrecords_train_dataset( If no metadata file could be found in tfrecords directory. """ # Load NN Structure and training parameter from file - NN_structure = train_config.NN_structure + nn_structure = train_config.NN_structure tracks_name = train_config.tracks_name # Get the files in dir @@ -58,7 +58,7 @@ def load_tfrecords_train_dataset( # Check if nfiles is given. Otherwise set to 5 try: - nfiles = NN_structure["nfiles_tfrecord"] + nfiles = nn_structure["nfiles_tfrecord"] except KeyError: nfiles = 5 @@ -69,15 +69,15 @@ def load_tfrecords_train_dataset( # Get the tfrecords tfrecord_reader = TFRecordReader( path=train_config.train_file, - batch_size=NN_structure["batch_size"], + batch_size=nn_structure["batch_size"], nfiles=nfiles, - tagger_name=NN_structure["tagger"], + tagger_name=nn_structure["tagger"], tracks_name=tracks_name, - n_cond=NN_structure["N_Conditions"] if "N_Conditions" in NN_structure else None, + n_cond=nn_structure["N_Conditions"] if "N_Conditions" in nn_structure else None, ) # Load the dataset from reader - train_dataset = tfrecord_reader.load_Dataset() + train_dataset = tfrecord_reader.load_dataset() # Get the metadata name metadata_name = (train_config.train_file + "/metadata.json").replace("//", "/") @@ -135,7 +135,7 @@ class TFRecordReader: self.sample_weights = sample_weights self.n_cond = n_cond - def load_Dataset(self): + def load_dataset(self): """ Load TFRecord and create Dataset for training @@ -144,15 +144,15 @@ class TFRecordReader: tf_Dataset """ data_files = tf.io.gfile.glob((self.path + "/*.tfrecord").replace("//", "/")) - Dataset_shards = tf.data.Dataset.from_tensor_slices([data_files]) - Dataset_shards.shuffle(tf.cast(tf.shape(data_files)[0], tf.int64)) - tf_Dataset = Dataset_shards.interleave( + dataset_shards = tf.data.Dataset.from_tensor_slices([data_files]) + dataset_shards.shuffle(tf.cast(tf.shape(data_files)[0], tf.int64)) + tf_dataset = dataset_shards.interleave( tf.data.TFRecordDataset, num_parallel_calls=tf.data.AUTOTUNE, cycle_length=self.nfiles, ) - tf_Dataset = ( - tf_Dataset.shuffle(self.batch_size * 10) + tf_dataset = ( + tf_dataset.shuffle(self.batch_size * 10) .batch(self.batch_size) .map( self.decode_fn, @@ -161,7 +161,7 @@ class TFRecordReader: .repeat() .prefetch(3) ) - return tf_Dataset + return tf_dataset def decode_fn(self, record_bytes): """ -- GitLab From d7a7088344de791c412807da84d1cd2f50e97b40 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 13:37:21 +0200 Subject: [PATCH 07/28] change load_config_file --- umami/configuration/Configuration.py | 4 ++-- umami/preprocessing_tools/Configuration.py | 4 ++-- umami/train_tools/Configuration.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/umami/configuration/Configuration.py b/umami/configuration/Configuration.py index 070ff5b3..14341a7e 100644 --- a/umami/configuration/Configuration.py +++ b/umami/configuration/Configuration.py @@ -18,13 +18,13 @@ class Configuration: self.yaml_config = ( f"{pathlib.Path(__file__).parent.absolute()}/../configs/global_config.yaml" ) - self.LoadConfigFile() + self.load_config_file() self.logger = self.SetLoggingLevel() self.SetTFDebugLevel() self.SetMPLPlottingBackend() self.GetConfiguration() - def LoadConfigFile(self): + def load_config_file(self): """Load config file from disk.""" with open(self.yaml_config, "r") as conf: self.config = yaml.load(conf, Loader=yaml.FullLoader) diff --git a/umami/preprocessing_tools/Configuration.py b/umami/preprocessing_tools/Configuration.py index 021ccdea..edc163f2 100644 --- a/umami/preprocessing_tools/Configuration.py +++ b/umami/preprocessing_tools/Configuration.py @@ -25,7 +25,7 @@ class Configuration: self.YAML = YAML(typ="safe", pure=True) self.yaml_config = yaml_config self.yaml_default_config = "configs/preprocessing_default_config.yaml" - self.LoadConfigFiles() + self.load_config_files() self.GetConfiguration() self.CheckTracksNames() @@ -71,7 +71,7 @@ class Configuration: ) return preprocess_parameters_path - def LoadConfigFiles(self) -> None: + def load_config_files(self) -> None: """Load config file from disk.""" self.yaml_default_config = os.path.join( os.path.dirname(__file__), self.yaml_default_config diff --git a/umami/train_tools/Configuration.py b/umami/train_tools/Configuration.py index 6545a038..e682533c 100644 --- a/umami/train_tools/Configuration.py +++ b/umami/train_tools/Configuration.py @@ -12,10 +12,10 @@ class Configuration: def __init__(self, yaml_config=None): super().__init__() self.yaml_config = yaml_config - self.LoadConfigFile() + self.load_config_file() self.GetConfiguration() - def LoadConfigFile(self): + def load_config_file(self): """ "Load config file from disk.""" logger.info(f"Using train config file {self.yaml_config}") with open(self.yaml_config, "r") as conf: -- GitLab From 80d03e1c29c015468c1b5c46278802c85ff3628b Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 13:38:06 +0200 Subject: [PATCH 08/28] rename to set_logging_level --- umami/configuration/Configuration.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/umami/configuration/Configuration.py b/umami/configuration/Configuration.py index 14341a7e..b4a71163 100644 --- a/umami/configuration/Configuration.py +++ b/umami/configuration/Configuration.py @@ -19,7 +19,7 @@ class Configuration: f"{pathlib.Path(__file__).parent.absolute()}/../configs/global_config.yaml" ) self.load_config_file() - self.logger = self.SetLoggingLevel() + self.logger = self.set_logging_level() self.SetTFDebugLevel() self.SetMPLPlottingBackend() self.GetConfiguration() @@ -67,7 +67,7 @@ class Configuration: self.logger.debug(f"Setting TFDebugLevel to {self.config['TFDebugLevel']}") os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(self.config["TFDebugLevel"]) - def SetLoggingLevel(self) -> object: + def set_logging_level(self) -> object: """Set DebugLevel for logging. Returns -- GitLab From 7146941fb5ef85d44ab3149f0b3ed7b98d6ef829 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 13:39:27 +0200 Subject: [PATCH 09/28] rename to get_test_sample_trks --- scripts/check_lwtnn-model.py | 2 +- umami/evaluate_model.py | 2 +- umami/tests/unit/train_tools/test_NN_tools.py | 14 +- umami/train_tools/NN_tools.py | 6 +- umami/train_tools/__init__.py | 2 +- umami/train_tools/nn_tools.py | 1753 +++++++++++++++++ 6 files changed, 1766 insertions(+), 13 deletions(-) create mode 100644 umami/train_tools/nn_tools.py diff --git a/scripts/check_lwtnn-model.py b/scripts/check_lwtnn-model.py index 28b3bce9..a4379829 100644 --- a/scripts/check_lwtnn-model.py +++ b/scripts/check_lwtnn-model.py @@ -220,7 +220,7 @@ def main(): # Get prediction for dips elif "dips" in tagger.casefold(): - X_test_trk, Y_test = utt.GetTestSampleTrks( + X_test_trk, Y_test = utt.get_test_sample_trks( input_file, var_dict, preprocess_config, diff --git a/umami/evaluate_model.py b/umami/evaluate_model.py index 1f5f09a2..f8a35038 100644 --- a/umami/evaluate_model.py +++ b/umami/evaluate_model.py @@ -510,7 +510,7 @@ def EvaluateModelDips( else: # Get the testfile with the needed configs - X, Y_test = utt.GetTestSampleTrks( + X, Y_test = utt.get_test_sample_trks( input_file=test_file, var_dict=train_config.var_dict, preprocess_config=preprocess_config, diff --git a/umami/tests/unit/train_tools/test_NN_tools.py b/umami/tests/unit/train_tools/test_NN_tools.py index f78d2dfe..410c7820 100644 --- a/umami/tests/unit/train_tools/test_NN_tools.py +++ b/umami/tests/unit/train_tools/test_NN_tools.py @@ -13,7 +13,6 @@ from umami.train_tools.NN_tools import ( GetModelPath, GetTestFile, GetTestSample, - GetTestSampleTrks, MyCallback, MyCallbackUmami, create_metadata_folder, @@ -21,6 +20,7 @@ from umami.train_tools.NN_tools import ( get_jet_feature_indices, get_jet_feature_position, get_parameters_from_validation_dict_name, + get_test_sample_trks, get_unique_identifiers, get_validation_dict_name, load_validation_data_dips, @@ -449,8 +449,8 @@ class GetSamples_TestCase(unittest.TestCase): self.nTracks = 40 self.config = {"exclude": self.exclude} - def test_GetTestSampleTrks(self): - X_trk, Y_trk = GetTestSampleTrks( + def test_get_test_sample_trks(self): + X_trk, Y_trk = get_test_sample_trks( input_file=self.validation_files["ttbar_r21_val"]["path"], var_dict=self.var_dict, preprocess_config=self, @@ -465,11 +465,11 @@ class GetSamples_TestCase(unittest.TestCase): ) self.assertEqual(Y_trk.shape, (len(Y_trk), 3)) - def test_GetTestSampleTrks_Different_class_labels(self): + def test_get_test_sample_trks_Different_class_labels(self): self.class_labels_given = ["ujets", "cjets", "bjets"] with self.assertRaises(AssertionError): - _, _ = GetTestSampleTrks( + _, _ = get_test_sample_trks( input_file=self.validation_files["ttbar_r21_val"]["path"], var_dict=self.var_dict, preprocess_config=self, @@ -478,10 +478,10 @@ class GetSamples_TestCase(unittest.TestCase): nJets=self.nJets, ) - def test_GetTestSampleTrks_Extended_Labeling(self): + def test_get_test_sample_trks_Extended_Labeling(self): self.sampling = {"class_labels": ["singlebjets", "cjets", "ujets", "bbjets"]} - X_trk, Y_trk = GetTestSampleTrks( + X_trk, Y_trk = get_test_sample_trks( input_file=self.validation_files["ttbar_r21_val"]["path"], var_dict=self.var_dict, preprocess_config=self, diff --git a/umami/train_tools/NN_tools.py b/umami/train_tools/NN_tools.py index c8a0bd2f..b5028b1d 100644 --- a/umami/train_tools/NN_tools.py +++ b/umami/train_tools/NN_tools.py @@ -822,7 +822,7 @@ def GetTestSample( return jets, labels -def GetTestSampleTrks( +def get_test_sample_trks( input_file: str, var_dict: str, preprocess_config: object, @@ -1155,7 +1155,7 @@ def load_validation_data_dips( else None ) - (X_valid, Y_valid,) = GetTestSampleTrks( + (X_valid, Y_valid,) = get_test_sample_trks( input_file=val_file_config["path"], var_dict=train_config.var_dict, preprocess_config=preprocess_config, @@ -1231,7 +1231,7 @@ def GetTestFile( Y values ready to be used in the NN's. """ - X_trk, Y_trk = GetTestSampleTrks( + X_trk, Y_trk = get_test_sample_trks( input_file=input_file, var_dict=var_dict, preprocess_config=preprocess_config, diff --git a/umami/train_tools/__init__.py b/umami/train_tools/__init__.py index 0b796f7e..a4eab88c 100644 --- a/umami/train_tools/__init__.py +++ b/umami/train_tools/__init__.py @@ -5,7 +5,6 @@ from umami.train_tools.NN_tools import ( GetModelPath, GetTestFile, GetTestSample, - GetTestSampleTrks, MyCallback, MyCallbackUmami, calc_validation_metrics, @@ -16,6 +15,7 @@ from umami.train_tools.NN_tools import ( get_jet_feature_indices, get_jet_feature_position, get_parameters_from_validation_dict_name, + get_test_sample_trks, get_validation_dict_name, load_validation_data_dips, load_validation_data_dl1, diff --git a/umami/train_tools/nn_tools.py b/umami/train_tools/nn_tools.py new file mode 100644 index 00000000..8b13f8c2 --- /dev/null +++ b/umami/train_tools/nn_tools.py @@ -0,0 +1,1753 @@ +"""Helper functions for training tools.""" +from umami.configuration import global_config, logger # isort:skip +import json +import os +import re +from glob import glob +from pathlib import Path +from shutil import copyfile + +import numpy as np +import tensorflow as tf +from tensorflow.keras.callbacks import Callback # pylint: disable=import-error +from tensorflow.keras.models import load_model # pylint: disable=import-error +from tensorflow.keras.utils import CustomObjectScope # pylint: disable=import-error + +import umami.metrics as umt +import umami.tf_tools as utf +from umami.data_tools import LoadJetsFromFile, LoadTrksFromFile +from umami.preprocessing_tools import Configuration as Preprocess_Configuration +from umami.preprocessing_tools import ( + Gen_default_dict, + GetBinaryLabels, + GetVariableDict, + apply_scaling_trks, +) +from umami.tools import natural_keys, replaceLineInFile + + +def get_unique_identifiers(keys: list, prefix: str) -> list: + """Helper function which takes a list of strings, searches them for a given prefix + of the form "prefix_" and returns the remaining part of the matching + strings + + Parameters + ---------- + keys : list + List of strings which are searched for the given prefix + prefix : str + Prefix to search for in the provided strings + + Returns + ------- + identifiers : list + Sorted list of the unique identifiers that could be found. + """ + + identifiers = list( + {key.replace(prefix + "_", "") for key in keys if key.startswith(prefix)} + ) + + return sorted(identifiers) + + +def get_epoch_from_string(string): + """ + Get the epoch from the model file string. + + Parameters + ---------- + string : str + Path of the model file. + + Returns + ------- + epoch : int + Epoch of the model file. + """ + + epoch = re.search("model_epoch(.+?).h5", string) + return epoch.group(1) + + +def get_validation_dict_name(working_point: float, n_jets: int, dir_name: str) -> str: + """ + Get the validation dict name based on working_point, number of jets and dir_name. + + Parameters + ---------- + working_point : float + Working point that was used to calculate validation dict. + n_jets : int + Number of jets that was used to calculate validation dict. + dir_name : str + Path of the directory where the validation dict is saved. + + Returns + ------- + validation_dict_path : str + Path of the validation dict. + """ + + # Get the path of the validation dict + validation_dict_path = os.path.join( + dir_name, + f"validation_working_point{str(working_point).replace('.','p')}_" + f"{int(n_jets)}jets_Dict.json", + ) + + return validation_dict_path + + +def GetModelPath(model_name: str, epoch: int) -> str: + """ + Get the path where the model will be saved/is saved. + + Parameters + ---------- + model_name : str + Name of the model that is to be saved/loaded. + epoch : int + The epoch which is to be saved/loaded + + Returns + ------- + model_path : str + Path to the model file of the specified epoch. + """ + + # Get path + model_path = f"{model_name}/model_files/model_epoch{epoch:03d}.h5" + + # Get logger output for debugging + logger.debug(f"Treating model {model_path}") + + # Return path + return model_path + + +def prepare_history_dict(hist_dict: dict) -> list: + """ + Make the history dict from keras the same shape as the one from the callbacks. + + Parameters + ---------- + hist_dict : dict + Dict with the history inside. + + Returns + ------- + history_dict_list : list + Reshaped history dict as list. Same shape as the one from the callbacks + """ + + # Init a new list + history_dict_list = [] + + # Iterate over the epochs + for epoch_counter in range(len(hist_dict["loss"])): + + # Init a temporary dict for the epoch + tmp_dict = {"epoch": epoch_counter} + + # Add the metrics from this epoch to the dict + for metric in hist_dict: + tmp_dict[metric] = float(hist_dict[metric][epoch_counter]) + + # Append dict to list + history_dict_list.append(tmp_dict) + + # Return dict + return history_dict_list + + +def get_parameters_from_validation_dict_name(dict_name: str) -> dict: + """ + Get the parameters used to calculate the validation dict from the + validation dict name. + + Parameters + ---------- + dict_name : str + Name of the validation dict. + + Returns + ------- + parameters : dict + Dict with the parameters (working_point, n_jets, dir_name) used to calculate + the validation dict. + + Raises + ------ + Exception + If the name of the validation dict could be rebuild from the + extracted parameters. + """ + + # Split the path and only get the dict name + sp = dict_name.split("/")[-1].split("_") + + # Init a new dict for the parameters + parameters = {} + + # Get the parameters from the name and add them to the dict + parameters["working_point"] = float( + sp[1].replace("working_point", "").replace("p", ".") + ) + parameters["n_jets"] = int(sp[2].replace("jets", "")) + parameters["dir_name"] = str(Path(dict_name).parent) + + # Check if the values are correct extracted. Try to build the name + # from the parameters and check if they are identical. + if get_validation_dict_name(**parameters) != dict_name: + raise Exception( + f"Can't infer parameters correctly for {dict_name}. Parameters:" + f" {parameters}" + ) + + # Return the parameters + return parameters + + +def setup_output_directory( + dir_name: str, + clean_start: bool = True, +) -> None: + """ + Check the output directory path and init/clean it. + + Parameters + ---------- + dir_name : str + Path of the output directory. + clean_start : bool + Decide, if the old model files are cleaned or not. + + Raises + ------ + Exception + If the dir_name is an existing file. + """ + + outdir = Path(dir_name) + if outdir.is_dir() and clean_start: + logger.info("Removing model*.h5 and *.json files.") + for model_file in outdir.glob("model*.h5"): + model_file.unlink() + for model_file in outdir.glob("*.json"): + model_file.unlink() + elif outdir.is_dir() and not clean_start: + logger.info("Continue training. Old model files will not be erased.") + elif outdir.is_file(): + raise Exception( + f"{dir_name} is the output directory name but it already exists as a file!" + ) + else: + outdir.mkdir() + + +def create_metadata_folder( + train_config_path: str, + var_dict_path: str, + model_name: str, + preprocess_config_path: str, + model_file_path: str = None, + overwrite_config: bool = False, +) -> None: + """ + Create a metadata folder in the new model_name dir and + copy all configs there and change the paths inside the + configs to the new metadata directory path. + + Parameters + ---------- + train_config_path : str + Path to the train config that is used. + var_dict_path : str + Path to the variable dict that is used. + model_name : str + Model name that is used. + preprocess_config_path : str + Path to the preprocessing config that is used. + model_file_path : str + Path to a model to start from (the model given in model_file). + overwrite_config : bool + If configs already in metadata folder, overwrite + them or not. + """ + + # Check if model path already existing + # If not, make it + os.makedirs(os.path.join(model_name, "metadata"), exist_ok=True) + + # Create directory for models + os.makedirs(os.path.join(model_name, "model_files"), exist_ok=True) + + # Get scale dict + preprocess_config = Preprocess_Configuration(preprocess_config_path) + scale_dict_path = preprocess_config.dict_file + preprocess_parameters_path = preprocess_config.ParameterConfigPath + + # Copy files to metadata folder if not existing + for file_path in [ + train_config_path, + preprocess_config_path, + var_dict_path, + scale_dict_path, + preprocess_parameters_path, + model_file_path, + ]: + if file_path is None: + continue + if (overwrite_config is True) or not os.path.isfile( + os.path.join(model_name, "metadata", os.path.basename(file_path)) + ): + logger.info(f"Copy {file_path} to metadata folder!") + copyfile( + file_path, + os.path.join(model_name, "metadata", os.path.basename(file_path)), + ) + + # Change the paths for the preprocess config and var dict in the + # train_config + if file_path == train_config_path: + metadata_preprocess_config_path = os.path.join( + os.getcwd(), + model_name, + "metadata", + os.path.basename(preprocess_config_path), + ) + + metadata_var_dict_path = os.path.join( + os.getcwd(), + model_name, + "metadata", + os.path.basename(var_dict_path), + ) + + replaceLineInFile( + os.path.join(model_name, "metadata", os.path.basename(file_path)), + "preprocess_config:", + f"preprocess_config: {metadata_preprocess_config_path}", + ) + + replaceLineInFile( + os.path.join(model_name, "metadata", os.path.basename(file_path)), + "var_dict:", + f"var_dict: {metadata_var_dict_path}", + ) + + if model_file_path: + metadata_model_file_path = os.path.join( + os.getcwd(), + model_name, + "metadata", + os.path.basename(model_file_path), + ) + + replaceLineInFile( + os.path.join( + model_name, "metadata", os.path.basename(file_path) + ), + "model_file:", + f"model_file: {metadata_model_file_path}", + ) + + elif file_path == preprocess_parameters_path: + metadata_scale_dict_path = os.path.join( + os.getcwd(), + model_name, + "metadata", + os.path.basename(scale_dict_path), + ) + + metadata_var_dict_path = os.path.join( + os.getcwd(), + model_name, + "metadata", + os.path.basename(var_dict_path), + ) + + replaceLineInFile( + os.path.join(model_name, "metadata", os.path.basename(file_path)), + ".dict_file: &dict_file", + f".dict_file: &dict_file {metadata_scale_dict_path}", + ) + + replaceLineInFile( + os.path.join(model_name, "metadata", os.path.basename(file_path)), + ".var_file: &var_file", + f".var_file: &var_file {metadata_var_dict_path}", + ) + + +class CallbackBase(Callback): + """Base class for the callbacks of the different models. + + This class provides the base functionalites for the different + callbacks needed for the models that are available. + """ + + def __init__( + self, + class_labels: list, + main_class: str, + val_data_dict: dict = None, + model_name: str = "test", + target_beff: float = 0.77, + frac_dict: dict = None, + dict_file_name: str = "DictFile.json", + clean_start: bool = True, + ): + """Init the parameters needed for the callback + + Parameters + ---------- + class_labels : list + List of class labels used in training (ORDER MATTERS!). + main_class : str + Name of the main class which is used. For b-tagging + obviously `bjets`. + val_data_dict : dict + Dict with the loaded validation data. These are loaded + using the `load_validation_data_*` functions. + model_name : str + Name of the model used to evaluate. This is important + for the path where the results are of the callback are saved. + target_beff : float + Float value between 0 and 1 for which main class efficiency + the rejections are calculated. + frac_dict : dict + Dict with the fraction values for the non-main classes. The + values need to add up to 1. + dict_file_name : str + Name of the file where the dict with the results of the callback + are saved. + clean_start : bool + Decide, if the directory where the output is saved will be cleaned + before the training starts, by default True + """ + super().__init__() + + # Add parameters to as attributes + self.class_labels = class_labels + self.main_class = main_class + self.val_data_dict = val_data_dict + self.target_beff = target_beff + self.frac_dict = ( + { + "cjets": 0.018, + "ujets": 0.982, + } + if frac_dict is None + else frac_dict + ) + self.model_name = model_name + self.dict_file_name = dict_file_name + self.clean_start = clean_start + + # Init a list for the result dicts for each epoch + self.dict_list = [] + + # Init the directory and clean it from previous training + setup_output_directory( + dir_name=self.model_name, + clean_start=self.clean_start, + ) + + +class MyCallback(CallbackBase): + """Callback class for the standard taggers + + This class is the callback for the standard taggers. Only one + output (not like the umami tagger) is given. + """ + + def on_epoch_end(self, epoch: int, logs: dict = None): + """Get the needed metrics at epoch end and calculate rest. + + This method saves the training metrics at the end of the + epoch and also calculates the validation metrics and + the rejections for each non-main class for given + efficiency and fraction values. Those are also saved. + + Parameters + ---------- + epoch : int + Number of the epoch which just finished and is now + evaluated and saved. + logs : dict + Dict with the training metrics of the just finished + epoch. + """ + + # Define a dict with the epoch and the training metrics + dict_epoch = { + "epoch": epoch + 1, + "learning_rate": logs["lr"].item(), + "loss": logs["loss"], + "accuracy": logs["accuracy"], + } + + # If val data is given, calculate validaton metrics and rejections + if self.val_data_dict: + result_dict = evaluate_model( + model=self.model, + data_dict=self.val_data_dict, + class_labels=self.class_labels, + main_class=self.main_class, + target_beff=self.target_beff, + frac_dict=self.frac_dict, + ) + + # Once we use python >=3.9 + # (see https://www.python.org/dev/peps/pep-0584/#specification) + # switch to the following: dict_epoch |= result_dict + dict_epoch = {**dict_epoch, **result_dict} + + # Append the dict to the list + self.dict_list.append(dict_epoch) + + # Dump the list in json file + with open(self.dict_file_name, "w") as outfile: + json.dump(self.dict_list, outfile, indent=4) + + +class MyCallbackUmami(CallbackBase): + """Callback class for the umami tagger + + This class is the callback for the umami tagger. Due to the + two outputs of the tagger, we need special metrics etc. + """ + + def on_epoch_end(self, epoch: int, logs: dict = None): + """Get the needed metrics at epoch end and calculate rest. + + This method saves the training metrics at the end of the + epoch and also calculates the validation metrics and + the rejections for each non-main class for given + efficiency and fraction values. Those are also saved. + + Parameters + ---------- + epoch : int + Number of the epoch which just finished and is now + evaluated and saved. + logs : dict + Dict with the training metrics of the just finished + epoch. + """ + + # Define a dict with the epoch and the training metrics + dict_epoch = { + "epoch": epoch + 1, + "learning_rate": logs["lr"].item(), + "loss": logs["loss"], + "loss_dips": logs["dips_loss"], + "loss_umami": logs["umami_loss"], + "accuracy_dips": logs["dips_accuracy"], + "accuracy_umami": logs["umami_accuracy"], + } + + # If val data is given, calculate validaton metrics and rejections + if self.val_data_dict: + result_dict = evaluate_model_umami( + model=self.model, + data_dict=self.val_data_dict, + class_labels=self.class_labels, + main_class=self.main_class, + target_beff=self.target_beff, + frac_dict=self.frac_dict, + ) + + # Once we use python >=3.9 + # (see https://www.python.org/dev/peps/pep-0584/#specification) + # switch to the following: dict_epoch |= result_dict + dict_epoch = {**dict_epoch, **result_dict} + + # Append the dict to the list + self.dict_list.append(dict_epoch) + + # Dump the list in json file + with open(self.dict_file_name, "w") as outfile: + json.dump(self.dict_list, outfile, indent=4) + + +def get_jet_feature_indices(variable_header: dict, exclude: list = None): + """ + Deletes from the jet samples the keys listed in exclude. + + Parameters + ---------- + variable_header : dict + List with the variables. + exclude : list + List with the variables that are to be excluded. + + Returns + ------- + variables : list + List with the new variables without the excluded ones. + excluded_variables : list + List of the excluded variables. + excluded_var_indices : list + List of the indicies of the excluded variables. + """ + + excluded_variables = [] + all_variables = [i for j in variable_header for i in variable_header[j]] + if exclude is None: + return all_variables, excluded_variables, None + missing_header = [] + for exclude_this in exclude: + if exclude_this in variable_header: + excluded_variables.extend(variable_header[exclude_this]) + variable_header.pop(exclude_this, None) + else: + missing_header.append(exclude_this) + variables = [i for j in variable_header for i in variable_header[j]] + # If elements in exclude are not headers, check if they aren't variables + for exclude_that in missing_header: + if exclude_that in variables: + excluded_variables.append(exclude_that) + variables.remove(exclude_that) + else: + logger.warning(f"Variables to exclude not found: {exclude_that}") + # Get the index of the excluded variables for training + excluded_var_indices = [ + i for i, excl in enumerate(all_variables) if excl in excluded_variables + ] + # set to None if the list of excluded variables is empty + excluded_var_indices = ( + None if len(excluded_var_indices) == 0 else excluded_var_indices + ) + logger.debug(f"variables: {variables}") + logger.debug(f"excluded_variables: {excluded_variables}") + logger.debug(f"excluded_var_indices: {excluded_var_indices}") + return variables, excluded_variables, excluded_var_indices + + +def get_jet_feature_position( + variable_list: list, + column_names: list, +) -> list: + """ + Return the index position of the variables listed in variable_list within + the column_names list. + WARNING: should match the column order of the training data! + + Parameters + ---------- + variable_list : list + List with the variables + column_names : list + List with the names of the columns + + Returns + ------- + list + List with the positions of the columns + + Raises + ------ + ValueError + If the variable is not in the set. + """ + position_list = [] + for variable in variable_list: + try: + index_pos = column_names.index(variable) + position_list.append(index_pos) + except ValueError as no_var_err: + raise ValueError( + f"Variable {variable} to fast forward not found in set!" + ) from no_var_err + return position_list + + +def GetTestSample( + input_file: str, + var_dict: str, + preprocess_config: object, + class_labels: list, + n_jets: int = int(3e5), + exclude: list = None, + cut_vars_dict: dict = None, + jet_variables: list = None, + print_logger: bool = True, +): + """ + Load the jet variables and labels. Scale the jet variables for validation + use in the NN's. + + Parameters + ---------- + input_file : str + Path to the file which is to be loaded. + var_dict : str + Variable dict with the wanted jet variables inside. + preprocess_config : object + Loaded preprocessing config that was used. + class_labels : list + List of classes used for training of the model. + n_jets : int + Number of jets that should be loaded. + exclude : list + List of variables that are not loaded. + cut_vars_dict : dict + Dict with the cuts that should be applied. + jet_variables : list + List of variables that are used. + print_logger : bool + Decide, if the logger info is printed or not. + + Returns + ------- + jets : numpy.ndarray + X values of the jets ready to be used in the NN's. + labels : numpy.ndarray + Y values ready to be used in the NN's. + + Raises + ------ + ValueError + If jet_variables and exclude are used at the same time. + RuntimeError + If no file could be found in the given filepath. + KeyError + If variable is used which is not in the scale dict. + """ + + # Assert that the jet variables and exlude are not called at the same time + if jet_variables and exclude: + raise ValueError("You can't set exclude and jet_variables. Choose one!") + + # Adding class_labels check between preprocess_config and given labels + # Try/Except here for backward compatibility + try: + assert preprocess_config.sampling["class_labels"] == class_labels, ( + "class_labels from preprocessing_config and from train_config are" + " different! They need to be the same!" + ) + + except (AttributeError, KeyError): + logger.warning( + "Deprecation Warning: class_labels are given in preparation" + " and not in sampling block! Consider moving this to" + " the sampling block in your config!" + ) + assert preprocess_config.preparation["class_labels"] == class_labels, ( + "class_labels from preprocessing_config and from train_config are" + " different! They need to be the same!" + ) + + # Get the paths of the input file as list + # In case there are multiple files (Wildcard etc.) + filepaths = glob(input_file) + + # Check if filepaths is empty + if len(filepaths) == 0: + raise RuntimeError( + f""" + No file found in path {input_file}! + Check the filepath in your train_config file! + """ + ) + + # Load variables + variable_config = GetVariableDict(var_dict) + + # Load scale dict + with open(preprocess_config.dict_file, "r") as infile: + scale_dict = json.load(infile)["jets"] + + jets, Umami_labels = LoadJetsFromFile( + filepath=filepaths, + class_labels=class_labels, + n_jets=n_jets, + cut_vars_dict=cut_vars_dict, + variables=jet_variables, + print_logger=False, + ) + + # Binarize Labels + labels = GetBinaryLabels(Umami_labels) + + # Check if jet_variables is defined + if jet_variables: + # Retrieve the defined variables + variables = jet_variables + excluded_variables = [] + + else: + # Retrieve variables and the excluded variables from the config + variables, excluded_variables, _ = get_jet_feature_indices( + variable_config["train_variables"], exclude + ) + + # Select only wanted variables + jets = jets[variables] + + # Replace inf with nans + jets = jets.replace([np.inf, -np.inf], np.nan) + + logger.info("Replacing default values.") + default_dict = Gen_default_dict(scale_dict) + jets = jets.fillna(default_dict) + + logger.info("Applying scaling and shifting.") + scale_dict_variables = [] + for elem in scale_dict: + scale_dict_variables.append(elem["name"]) + if elem["name"] not in variables: + if print_logger: + if elem["name"] in excluded_variables: + logger.info( + f"{elem['name']} has been excluded from variable" + " config (is in scale dict)." + ) + else: + logger.warning( + f"{elem['name']} in scale dict but not in variable config." + ) + continue + if "isDefaults" in elem["name"]: + continue + jets[elem["name"]] -= elem["shift"] + jets[elem["name"]] /= elem["scale"] + if not set(variables).issubset(scale_dict_variables): + raise KeyError( + f"Requested {(set(variables).difference(scale_dict_variables))}" + " which are not in scale dict." + ) + + # Return jets and labels + return jets, labels + + +def get_test_sample_trks( + input_file: str, + var_dict: str, + preprocess_config: object, + class_labels: list, + tracks_name: str, + n_jets: int = int(3e5), + cut_vars_dict: dict = None, + print_logger: bool = False, +): + """ + Load the track variables and labels. Scale the track variables for validation + use in the NN's. + + Parameters + ---------- + input_file : str + Path to the file which is to be loaded. + var_dict : str + Variable dict with the wanted track variables inside. + preprocess_config : object + Loaded preprocessing config that was used. + class_labels : list + List of classes used for training of the model. + tracks_name : str + Name of tracks collection to use. + n_jets : int + Number of jets that should be loaded. + cut_vars_dict : dict + Dict with the cuts that should be applied. + print_logger : bool + Decide, if the logger info is printed or not. + + Returns + ------- + trks : numpy.ndarray + X values of the tracks ready to be used in the NN's. + binary_labels : numpy.ndarray + Y values ready to be used in the NN's. + + Raises + ------ + RuntimeError + If no file could be found in the given filepath. + """ + + # Adding class_labels check between preprocess_config and given labels + # Try/Except here for backward compatibility + try: + assert preprocess_config.sampling["class_labels"] == class_labels, ( + "class_labels from preprocessing_config and from train_config are" + " different! They need to be the same!" + ) + + except (AttributeError, KeyError): + logger.warning( + "Deprecation Warning: class_labels are given in preparation" + " and not in sampling block! Consider moving this to" + " the sampling block in your config!" + ) + assert preprocess_config.preparation["class_labels"] == class_labels, ( + "class_labels from preprocessing_config and from train_config are" + " different! They need to be the same!" + ) + + # making sure the n_jets aregument is an integer + n_jets = int(n_jets) + # Get the paths of the input file as list + # In case there are multiple files (Wildcard etc.) + filepaths = glob(input_file) + + # Check if filepaths is empty + if len(filepaths) == 0: + raise RuntimeError( + f""" + No file found in path {input_file}! + Check the filepath in your train_config file! + """ + ) + + # Load variables + variable_config = GetVariableDict(var_dict) + + # Load scale dict for the tracks + with open(preprocess_config.dict_file, "r") as infile: + scale_dict = json.load(infile)[f"{tracks_name}"] + + trks, labels = LoadTrksFromFile( + filepath=filepaths, + class_labels=class_labels, + n_jets=n_jets, + tracks_name=tracks_name, + cut_vars_dict=cut_vars_dict, + print_logger=print_logger, + ) + + # Binarize the labels + binary_labels = GetBinaryLabels(labels) + + # Apply scaling to the tracks + trks, _ = apply_scaling_trks( + trks=trks, + variable_config=variable_config, + scale_dict=scale_dict, + tracks_name=tracks_name, + ) + + return trks, binary_labels + + +def load_validation_data_umami( + train_config: object, + preprocess_config: object, + n_jets: int, + jets_var_list: list = None, + convert_to_tensor: bool = False, + nCond: int = None, +) -> dict: + """ + Load the validation data for UMAMI. + + Parameters + ---------- + train_config : object + Loaded train_config object. + preprocess_config : object + Loaded preprocess_config object. + n_jets : int + Number of jets to load. + jets_var_list : list + List with jet variables that are to be loaded. + convert_to_tensor : bool + Decide, if the validation data are converted to + tensorflow tensors to avoid memory leaks. + nCond: int + Number of addittional variables used for attention + + Returns + ------- + val_data_dict : dict + Dict with the validation data. + """ + if jets_var_list is None: + jets_var_list = [] + # Define nn_structure and the Eval params + nn_structure = train_config.nn_structure + + # Init a new dict for the loaded val data + val_data_dict = {} + val_files = train_config.validation_files + + # Set the tracks collection name + tracks_name = train_config.tracks_name + logger.debug(f"Using tracks_name value '{tracks_name}' for validation") + + for val_file_identifier, val_file_config in val_files.items(): + logger.info(f"Loading validation file {val_file_identifier}") + # Get the cut vars dict if defined + cut_vars_dict = ( + val_file_config["variable_cuts"] + if "variable_cuts" in val_file_config + else None + ) + + # Check for excluded variables + exclude = None + if "exclude" in train_config.config: + exclude = train_config.config["exclude"] + + (x_valid, x_valid_trk, y_valid,) = GetTestFile( + input_file=val_file_config["path"], + var_dict=train_config.var_dict, + preprocess_config=preprocess_config, + class_labels=nn_structure["class_labels"], + tracks_name=tracks_name, + n_jets=n_jets, + exclude=exclude, + jet_variables=jets_var_list, + cut_vars_dict=cut_vars_dict, + ) + + if convert_to_tensor: + # Transform to tf.tensors and add to val_dict + val_data_dict[f"X_valid_{val_file_identifier}"] = tf.convert_to_tensor( + x_valid, dtype=tf.float64 + ) + val_data_dict[f"X_valid_trk_{val_file_identifier}"] = tf.convert_to_tensor( + x_valid_trk, dtype=tf.float64 + ) + val_data_dict[f"Y_valid_{val_file_identifier}"] = tf.convert_to_tensor( + y_valid, dtype=tf.int64 + ) + if nCond is not None: + val_data_dict[ + f"X_valid_addvars_{val_file_identifier}" + ] = tf.convert_to_tensor(x_valid.iloc[:, :nCond], dtype=tf.float64) + + else: + val_data_dict[f"X_valid_{val_file_identifier}"] = x_valid + val_data_dict[f"X_valid_trk_{val_file_identifier}"] = x_valid_trk + val_data_dict[f"Y_valid_{val_file_identifier}"] = y_valid + if nCond is not None: + val_data_dict[f"X_valid_addvars_{val_file_identifier}"] = x_valid.iloc[ + :, :nCond + ] + + # Return the val data dict + return val_data_dict + + +def load_validation_data_dl1( + train_config: object, + preprocess_config: object, + n_jets: int, + convert_to_tensor: bool = False, +) -> dict: + """ + Load the validation data for DL1. + + Parameters + ---------- + train_config : object + Loaded train_config object. + preprocess_config : object + Loaded preprocess_config object. + n_jets : int + Number of jets to load. + convert_to_tensor : bool + Decide, if the validation data are converted to + tensorflow tensors to avoid memory leaks. + + Returns + ------- + val_data_dict : dict + Dict with the validation data. + """ + + # Define nn_structure and the Eval params + nn_structure = train_config.nn_structure + val_data_dict = {} + val_files = train_config.validation_files + + # Ensure the n_jets is an int + n_jets = int(n_jets) + + # Check for excluded variables + exclude = None + if "exclude" in train_config.config: + exclude = train_config.config["exclude"] + + # loop over validation files and load X_valid, Y_valid for each file + for val_file_identifier, val_file_config in val_files.items(): + logger.info(f"Loading validation file {val_file_identifier}") + + cut_vars_dict = ( + val_file_config["variable_cuts"] + if "variable_cuts" in val_file_config + else None + ) + + (X_valid, Y_valid,) = GetTestSample( + input_file=val_file_config["path"], + var_dict=train_config.var_dict, + preprocess_config=preprocess_config, + class_labels=nn_structure["class_labels"], + n_jets=n_jets, + exclude=exclude, + cut_vars_dict=cut_vars_dict, + ) + + if convert_to_tensor: + # Transform to tf.tensors and add to val_dict + val_data_dict[f"X_valid_{val_file_identifier}"] = tf.convert_to_tensor( + X_valid, dtype=tf.float64 + ) + val_data_dict[f"Y_valid_{val_file_identifier}"] = tf.convert_to_tensor( + Y_valid, dtype=tf.int64 + ) + + else: + val_data_dict[f"X_valid_{val_file_identifier}"] = X_valid + val_data_dict[f"Y_valid_{val_file_identifier}"] = Y_valid + + # Return the val data dict + return val_data_dict + + +def load_validation_data_dips( + train_config: object, + preprocess_config: object, + n_jets: int, + convert_to_tensor: bool = False, +) -> dict: + """ + Load the validation data for DIPS. + + Parameters + ---------- + train_config : object + Loaded train_config object. + preprocess_config : object + Loaded preprocess_config object. + n_jets : int + Number of jets to load. + convert_to_tensor : bool + Decide, if the validation data are converted to + tensorflow tensors to avoid memory leaks. + + Returns + ------- + val_data_dict : dict + Dict with the validation data. + """ + + # Define nn_structure and the Eval params + nn_structure = train_config.nn_structure + val_data_dict = {} + val_files = train_config.validation_files + + # Set the tracks collection name + tracks_name = train_config.tracks_name + logger.debug(f"Using tracks_name value '{tracks_name}' for validation") + + # loop over validation files and load X_valid, Y_valid for each file + for val_file_identifier, val_file_config in val_files.items(): + logger.info(f"Loading validation file {val_file_identifier}") + + cut_vars_dict = ( + val_file_config["variable_cuts"] + if "variable_cuts" in val_file_config + else None + ) + + (X_valid, Y_valid,) = get_test_sample_trks( + input_file=val_file_config["path"], + var_dict=train_config.var_dict, + preprocess_config=preprocess_config, + class_labels=nn_structure["class_labels"], + tracks_name=tracks_name, + n_jets=n_jets, + cut_vars_dict=cut_vars_dict, + ) + + if convert_to_tensor: + # Transform to tf.tensors and add to val_dict + val_data_dict[f"X_valid_{val_file_identifier}"] = tf.convert_to_tensor( + X_valid, dtype=tf.float64 + ) + val_data_dict[f"Y_valid_{val_file_identifier}"] = tf.convert_to_tensor( + Y_valid, dtype=tf.int64 + ) + + else: + val_data_dict[f"X_valid_{val_file_identifier}"] = X_valid + val_data_dict[f"Y_valid_{val_file_identifier}"] = Y_valid + + # Return the val data dict + return val_data_dict + + +def GetTestFile( + input_file: str, + var_dict: str, + preprocess_config: object, + class_labels: list, + tracks_name: str, + n_jets: int, + exclude: list = None, + cut_vars_dict: dict = None, + jet_variables: list = None, + print_logger: bool = True, +): + """ + Load the jet and track variables and labels. Scale the jet + and track variables for validation use in the NN's. + + Parameters + ---------- + input_file : str + Path to the file which is to be loaded. + var_dict : str + Variable dict with the wanted jet variables inside. + preprocess_config : object + Loaded preprocessing config that was used. + class_labels : list + List of classes used for training of the model. + tracks_name : str + Name of the tracks collection to use. + n_jets : int + Number of jets that should be loaded. + exclude : list + List of variables that are not loaded. + cut_vars_dict : dict + Dict with the cuts that should be applied. + jet_variables : list + List of variables that are used. + print_logger : bool + Decide, if the logger info is printed or not. + + Returns + ------- + X : numpy.ndarray + X values of the jets ready to be used in the NN's. + X_trk : numpy.ndarray + X values of the tracks ready to be used in the NN's. + Y : numpy.ndarray + Y values ready to be used in the NN's. + """ + + X_trk, Y_trk = get_test_sample_trks( + input_file=input_file, + var_dict=var_dict, + preprocess_config=preprocess_config, + class_labels=class_labels, + tracks_name=tracks_name, + n_jets=int(n_jets), + cut_vars_dict=cut_vars_dict, + print_logger=False, + ) + + X, Y = GetTestSample( + input_file=input_file, + var_dict=var_dict, + preprocess_config=preprocess_config, + class_labels=class_labels, + n_jets=int(n_jets), + exclude=exclude, + cut_vars_dict=cut_vars_dict, + jet_variables=jet_variables, + print_logger=print_logger, + ) + + assert np.equal(Y, Y_trk).all() + + return X, X_trk, Y + + +def evaluate_model_umami( + model: object, + data_dict: dict, + class_labels: list, + main_class: str, + frac_dict: dict, + target_beff: float = 0.77, +) -> dict: + """ + Evaluate the UMAMI model on the data provided. + + Parameters + ---------- + model : object + Loaded UMAMI model for evaluation. + data_dict : dict + Dict with the loaded data which are to be evaluated. + class_labels : list + List of classes used for training of the model. + main_class : str + Main class which is to be tagged. + target_beff : float + Working Point which is to be used for evaluation. + frac_dict : dict + Dict with the fractions of the non-main classes. + Sum needs to be one! + + Returns + ------- + result_dict : dict + Dict with validation metrics/rejections. + """ + + validation_file_identifiers = get_unique_identifiers( + keys=list(data_dict.keys()), prefix="Y_valid" + ) + + if len(validation_file_identifiers) == 0: + logger.warning("Didn't find any validation file identifiers.") + + result_dict = {} + + # loop over validation files and load X_valid, X_valid_trk, Y_valid for each file + for val_file_identifier in validation_file_identifiers: + # Check which input data need to be used + # Calculate accuracy andloss of UMAMI and Dips part + if f"X_valid_addvars_{val_file_identifier}" in data_dict: + x = [ + data_dict[f"X_valid_trk_{val_file_identifier}"], + data_dict[f"X_valid_addvars_{val_file_identifier}"], + data_dict[f"X_valid_{val_file_identifier}"], + ] + else: + x = [ + data_dict[f"X_valid_trk_{val_file_identifier}"], + data_dict[f"X_valid_{val_file_identifier}"], + ] + (loss, dips_loss, umami_loss, dips_accuracy, umami_accuracy,) = model.evaluate( + x, + data_dict[f"Y_valid_{val_file_identifier}"], + batch_size=15_000, + use_multiprocessing=True, + workers=8, + verbose=0, + ) + + # Evaluate with the model for predictions + y_pred_dips, y_pred_umami = model.predict( + x, + batch_size=15_000, + use_multiprocessing=True, + workers=8, + verbose=0, + ) + + # Get rejections for DIPS and UMAMI + rej_dict_dips, disc_cut_dips = umt.GetRejection( + y_pred=y_pred_dips, + y_true=data_dict[f"Y_valid_{val_file_identifier}"], + class_labels=class_labels, + main_class=main_class, + frac_dict=frac_dict["dips"], + target_eff=target_beff, + unique_identifier=val_file_identifier, + subtagger="dips", + ) + rej_dict_umami, disc_cut_umami = umt.GetRejection( + y_pred=y_pred_umami, + y_true=data_dict[f"Y_valid_{val_file_identifier}"], + class_labels=class_labels, + main_class=main_class, + frac_dict=frac_dict["umami"], + target_eff=target_beff, + unique_identifier=val_file_identifier, + subtagger="umami", + ) + + # Write metrics to results dict + # TODO Change this in python 3.9 + result_dict.update( + { + f"val_loss_{val_file_identifier}": loss, + f"val_loss_dips_{val_file_identifier}": dips_loss, + f"val_loss_umami_{val_file_identifier}": umami_loss, + f"val_acc_dips_{val_file_identifier}": dips_accuracy, + f"val_acc_umami_{val_file_identifier}": umami_accuracy, + f"disc_cut_dips_{val_file_identifier}": disc_cut_dips, + f"disc_cut_umami_{val_file_identifier}": disc_cut_umami, + } + ) + + # Write rejections to the results dict + # TODO Change this in python 3.9 + result_dict.update(rej_dict_umami) + result_dict.update(rej_dict_dips) + + return result_dict + + +def evaluate_model( + model: object, + data_dict: dict, + class_labels: list, + main_class: str, + target_beff: float = 0.77, + frac_dict: dict = None, +) -> dict: + """ + Evaluate the DIPS/DL1 model on the data provided. + + Parameters + ---------- + model : object + Loaded UMAMI model for evaluation. + data_dict : dict + Dict with the loaded data which are to be evaluated. + class_labels : list + List of classes used for training of the model. + main_class : str + Main class which is to be tagged. + target_beff : float + Working Point which is to be used for evaluation. + frac_dict : dict + Dict with the fractions of the non-main classes. + Sum needs to be one! + + Returns + ------- + result_dict : dict + Dict with validation metrics/rejections. + """ + + validation_file_identifiers = get_unique_identifiers( + keys=list(data_dict.keys()), prefix="Y_valid" + ) + + if len(validation_file_identifiers) == 0: + logger.warning("Didn't find any validation file identifiers.") + + result_dict = {} + # loop over validation files and load X_valid, Y_valid for each file + for val_file_identifier in validation_file_identifiers: + # Check which input data need to be used + if ( + f"X_valid_trk_{val_file_identifier}" in data_dict + and f"X_valid_{val_file_identifier}" in data_dict + ): + x = [ + data_dict[f"X_valid_trk_{val_file_identifier}"], + data_dict[f"X_valid_{val_file_identifier}"], + ] + + elif ( + f"X_valid_trk_{val_file_identifier}" in data_dict + and f"X_valid_{val_file_identifier}" not in data_dict + ): + x = data_dict[f"X_valid_trk_{val_file_identifier}"] + + else: + x = data_dict[f"X_valid_{val_file_identifier}"] + + loss, accuracy = model.evaluate( + x=x, + y=data_dict[f"Y_valid_{val_file_identifier}"], + batch_size=15_000, + use_multiprocessing=True, + workers=8, + verbose=0, + ) + + y_pred_dips = model.predict( + x=x, + batch_size=15_000, + use_multiprocessing=True, + workers=8, + verbose=0, + ) + + rej_dict, disc_cut = umt.GetRejection( + y_pred=y_pred_dips, + y_true=data_dict[f"Y_valid_{val_file_identifier}"], + unique_identifier=val_file_identifier, + class_labels=class_labels, + main_class=main_class, + frac_dict=frac_dict, + target_eff=target_beff, + ) + + # Adding the results to result_dict + result_dict.update( + { + f"val_loss_{val_file_identifier}": loss, + f"val_acc_{val_file_identifier}": accuracy, + f"disc_cut_{val_file_identifier}": disc_cut, + } + ) + + # Write the rejection values to the results dict + # TODO Change this in python 3.9 + result_dict.update( + {f"{key}": rej_dict[key] for key in rej_dict} # pylint: disable=C0206 + ) + + # Return finished dict + return result_dict + + +def calc_validation_metrics( + train_config: object, + preprocess_config: object, + tagger: str, + target_beff: float = 0.77, + n_jets: int = int(3e5), + model_string: str = "model_epoch", +) -> str: + """ + Calculates the validation metrics and rejections for each epoch + and dump it into a json. + + Parameters + ---------- + train_config : object + The loaded train config object. + preprocess_config : object + The loaded preprocess config object. + tagger : str + Name of the tagger that is used to calcualte metrics. + target_beff : float + Working point that is to be used. + n_jets : int + Number of jets to use for calculation. + model_string : str + Name of the model files. + + Returns + ------- + output_file_path + Path to the validation dict where the results are saved in. + + Raises + ------ + ValueError + If "tagger" is not dips, dl1, umami or cads. + """ + + # Get evaluation parameters and NN structure from train config + Eval_parameters = train_config.Eval_parameters_validation + nn_structure = train_config.nn_structure + Second_model_string = ( + "dips_model_" if model_string == "model_epoch" else "model_epoch" + ) + + # Make a list with the model epochs saves + training_output = [ + os.path.join(f"{train_config.model_name}/model_files/", f) + for f in os.listdir(f"{train_config.model_name}/model_files/") + if model_string in f + ] + + if len(training_output) == 0: + logger.warning( + f"{model_string} models used but not found! Using {Second_model_string}" + ) + + # Set new model string + model_string = Second_model_string + + # Make a list with the model epochs saves with second model name string + training_output = [ + os.path.join(f"{train_config.model_name}/model_files/", f) + for f in os.listdir(f"{train_config.model_name}/model_files/") + if model_string in f + ] + + # Open the json file and load the training out + try: + with open( + get_validation_dict_name( + working_point=Eval_parameters["working_point"], + n_jets=Eval_parameters["n_jets"], + dir_name=train_config.model_name, + ), + "r", + ) as training_out_json: + training_output_list = json.load(training_out_json) + + except FileNotFoundError: + logger.info("No callback json file with validation metrics found! Make new one") + training_output_list = [ + {"epoch": n} for n in range(train_config.nn_structure["epochs"]) + ] + + # Init a results list + results = [] + + # TODO Change in Python 3.10 + # Check tagger and load the correct val data + if tagger.casefold() == "umami": + data_dict = load_validation_data_umami( + train_config=train_config, + preprocess_config=preprocess_config, + n_jets=n_jets, + convert_to_tensor=False, + ) + + elif tagger.casefold() == "dl1": + data_dict = load_validation_data_dl1( + train_config=train_config, + preprocess_config=preprocess_config, + n_jets=n_jets, + convert_to_tensor=False, + ) + + elif tagger.casefold() == "dips": + data_dict = load_validation_data_dips( + train_config=train_config, + preprocess_config=preprocess_config, + n_jets=n_jets, + convert_to_tensor=False, + ) + + elif tagger.casefold() == "cads": + data_dict = load_validation_data_umami( + train_config=train_config, + preprocess_config=preprocess_config, + n_jets=n_jets, + jets_var_list=[ + global_config.etavariable, + global_config.pTvariable, + ], + convert_to_tensor=False, + ) + + else: + raise ValueError(f"Tagger {tagger} is not supported!") + + # Loop over the different model savepoints at each epoch + for n, model_file in enumerate(sorted(training_output, key=natural_keys)): + logger.info(f"Working on {n+1}/{len(training_output)} input files") + + # Init results dict to save to + result_dict = {} + + # Get the epoch number from the .h5 file + try: + epoch = int( + model_file[ + model_file.rfind(f"{model_string}") + + len(f"{model_string}") : model_file.find(".h5") + ] + ) + + except ValueError as val_error: + raise ValueError( + f"Epoch could not be extracted from {model_string}!" + ) from val_error + + # Load the epoch from json and add it to dict + for train_epoch in training_output_list: + if epoch == train_epoch["epoch"]: + result_dict = train_epoch + + # Ensure the epoch is in the dict + result_dict["epoch"] = epoch + + if tagger.casefold() == "umami": + # Load UMAMI model + umami = load_model(model_file, {"Sum": utf.Sum}) + + # Evaluate Umami model + val_result_dict = evaluate_model_umami( + model=umami, + data_dict=data_dict, + class_labels=nn_structure["class_labels"], + main_class=nn_structure["main_class"], + target_beff=target_beff, + frac_dict=Eval_parameters["frac_values"], + ) + + # Delete model + del umami + + elif tagger.casefold() == "dl1": + # Load DL1 model + dl1 = load_model(model_file) + + # Evaluate DL1 model + val_result_dict = evaluate_model( + model=dl1, + data_dict=data_dict, + class_labels=nn_structure["class_labels"], + main_class=nn_structure["main_class"], + target_beff=target_beff, + frac_dict=Eval_parameters["frac_values"], + ) + + # Delete model + del dl1 + + elif tagger.casefold() == "dips": + # Load DIPS model + with CustomObjectScope({"Sum": utf.Sum}): + dips = load_model(model_file) + + # Validate dips + val_result_dict = evaluate_model( + model=dips, + data_dict=data_dict, + class_labels=nn_structure["class_labels"], + main_class=nn_structure["main_class"], + target_beff=target_beff, + frac_dict=Eval_parameters["frac_values"], + ) + + # Delete model + del dips + + elif tagger.casefold() == "cads": + # Load DIPS Conditional Attention model + with CustomObjectScope( + { + "Sum": utf.Sum, + "Attention": utf.Attention, + "DeepSet": utf.DeepSet, + "AttentionPooling": utf.AttentionPooling, + "DenseNet": utf.DenseNet, + "ConditionalAttention": utf.ConditionalAttention, + "ConditionalDeepSet": utf.ConditionalDeepSet, + } + ): + cads = load_model(model_file) + + # Validate dips + val_result_dict = evaluate_model( + model=cads, + data_dict=data_dict, + class_labels=nn_structure["class_labels"], + main_class=nn_structure["main_class"], + target_beff=target_beff, + frac_dict=Eval_parameters["frac_values"], + ) + + # Delete model + del cads + + else: + raise ValueError(f"Tagger {tagger} is not supported!") + + # Save results in dict + for k, v in val_result_dict.items(): + result_dict[k] = v + + # Append results dict to list + results.append(result_dict) + + # Sort the results after epoch + results = sorted(results, key=lambda x: x["epoch"]) + + # Get validation dict name + output_file_path = get_validation_dict_name( + target_beff, n_jets, train_config.model_name + ) + + # Dump dict into json + with open(output_file_path, "w") as outfile: + json.dump(results, outfile, indent=4) + + # Return Validation dict name + return output_file_path -- GitLab From 88a25d869a443c34a76e9d27c72e391b18d11e75 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 13:40:16 +0200 Subject: [PATCH 10/28] rename to get_test_sample --- scripts/check_lwtnn-model.py | 2 +- umami/evaluate_model.py | 2 +- umami/tests/unit/train_tools/test_NN_tools.py | 14 +++++++------- umami/train_tools/NN_tools.py | 6 +++--- umami/train_tools/__init__.py | 2 +- umami/train_tools/nn_tools.py | 6 +++--- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/scripts/check_lwtnn-model.py b/scripts/check_lwtnn-model.py index a4379829..b7d1a1e7 100644 --- a/scripts/check_lwtnn-model.py +++ b/scripts/check_lwtnn-model.py @@ -239,7 +239,7 @@ def main(): # Get prediction for dl1 elif "dl1" in tagger.casefold(): - X_test_jet, Y_test = utt.GetTestSample( + X_test_jet, Y_test = utt.get_test_sample( input_file, var_dict, preprocess_config, diff --git a/umami/evaluate_model.py b/umami/evaluate_model.py index f8a35038..7d82386b 100644 --- a/umami/evaluate_model.py +++ b/umami/evaluate_model.py @@ -788,7 +788,7 @@ def EvaluateModelDL1( exclude = train_config.config["exclude"] # Get the testfile with the needed configs - X_test, _ = utt.GetTestSample( + X_test, _ = utt.get_test_sample( input_file=test_file, var_dict=train_config.var_dict, preprocess_config=preprocess_config, diff --git a/umami/tests/unit/train_tools/test_NN_tools.py b/umami/tests/unit/train_tools/test_NN_tools.py index 410c7820..ff050da0 100644 --- a/umami/tests/unit/train_tools/test_NN_tools.py +++ b/umami/tests/unit/train_tools/test_NN_tools.py @@ -12,7 +12,6 @@ from umami.train_tools.Configuration import Configuration from umami.train_tools.NN_tools import ( GetModelPath, GetTestFile, - GetTestSample, MyCallback, MyCallbackUmami, create_metadata_folder, @@ -20,6 +19,7 @@ from umami.train_tools.NN_tools import ( get_jet_feature_indices, get_jet_feature_position, get_parameters_from_validation_dict_name, + get_test_sample, get_test_sample_trks, get_unique_identifiers, get_validation_dict_name, @@ -496,8 +496,8 @@ class GetSamples_TestCase(unittest.TestCase): ) self.assertEqual(Y_trk.shape, (len(Y_trk), 4)) - def test_GetTestSample(self): - X, Y = GetTestSample( + def test_get_test_sample(self): + X, Y = get_test_sample( input_file=self.validation_files["ttbar_r21_val"]["path"], var_dict=self.var_dict, preprocess_config=self, @@ -513,11 +513,11 @@ class GetSamples_TestCase(unittest.TestCase): ["absEta_btagJes", "JetFitter_isDefaults", "JetFitter_mass"], ) - def test_GetTestSample_Different_class_labels(self): + def test_get_test_sample_Different_class_labels(self): self.class_labels_given = ["ujets", "cjets", "bjets"] with self.assertRaises(AssertionError): - _, _ = GetTestSample( + _, _ = get_test_sample( input_file=self.validation_files["ttbar_r21_val"]["path"], var_dict=self.var_dict, preprocess_config=self, @@ -526,10 +526,10 @@ class GetSamples_TestCase(unittest.TestCase): exclude=self.exclude, ) - def test_GetTestSample_Extended_Labeling(self): + def test_get_test_sample_Extended_Labeling(self): self.sampling = {"class_labels": ["singlebjets", "cjets", "ujets", "bbjets"]} - X, Y = GetTestSample( + X, Y = get_test_sample( input_file=self.validation_files["ttbar_r21_val"]["path"], var_dict=self.var_dict, preprocess_config=self, diff --git a/umami/train_tools/NN_tools.py b/umami/train_tools/NN_tools.py index b5028b1d..25741e33 100644 --- a/umami/train_tools/NN_tools.py +++ b/umami/train_tools/NN_tools.py @@ -662,7 +662,7 @@ def get_jet_feature_position( return position_list -def GetTestSample( +def get_test_sample( input_file: str, var_dict: str, preprocess_config: object, @@ -1082,7 +1082,7 @@ def load_validation_data_dl1( else None ) - (X_valid, Y_valid,) = GetTestSample( + (X_valid, Y_valid,) = get_test_sample( input_file=val_file_config["path"], var_dict=train_config.var_dict, preprocess_config=preprocess_config, @@ -1242,7 +1242,7 @@ def GetTestFile( print_logger=False, ) - X, Y = GetTestSample( + X, Y = get_test_sample( input_file=input_file, var_dict=var_dict, preprocess_config=preprocess_config, diff --git a/umami/train_tools/__init__.py b/umami/train_tools/__init__.py index a4eab88c..fc366cfc 100644 --- a/umami/train_tools/__init__.py +++ b/umami/train_tools/__init__.py @@ -4,7 +4,6 @@ from umami.train_tools.Configuration import Configuration from umami.train_tools.NN_tools import ( GetModelPath, GetTestFile, - GetTestSample, MyCallback, MyCallbackUmami, calc_validation_metrics, @@ -15,6 +14,7 @@ from umami.train_tools.NN_tools import ( get_jet_feature_indices, get_jet_feature_position, get_parameters_from_validation_dict_name, + get_test_sample, get_test_sample_trks, get_validation_dict_name, load_validation_data_dips, diff --git a/umami/train_tools/nn_tools.py b/umami/train_tools/nn_tools.py index 8b13f8c2..fb66b99b 100644 --- a/umami/train_tools/nn_tools.py +++ b/umami/train_tools/nn_tools.py @@ -665,7 +665,7 @@ def get_jet_feature_position( return position_list -def GetTestSample( +def get_test_sample( input_file: str, var_dict: str, preprocess_config: object, @@ -1085,7 +1085,7 @@ def load_validation_data_dl1( else None ) - (X_valid, Y_valid,) = GetTestSample( + (X_valid, Y_valid,) = get_test_sample( input_file=val_file_config["path"], var_dict=train_config.var_dict, preprocess_config=preprocess_config, @@ -1245,7 +1245,7 @@ def GetTestFile( print_logger=False, ) - X, Y = GetTestSample( + X, Y = get_test_sample( input_file=input_file, var_dict=var_dict, preprocess_config=preprocess_config, -- GitLab From 452e5f1206ed6a24c4140a24e4daacde72aa707c Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 13:41:55 +0200 Subject: [PATCH 11/28] rename to h5_to_tf_record_converter --- umami/preprocessing.py | 2 +- umami/tests/unit/tf_tools/test_tf_tools_ConvertToRecord.py | 6 +++--- umami/tf_tools/Convert_to_Record.py | 2 +- umami/tf_tools/__init__.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/umami/preprocessing.py b/umami/preprocessing.py index 1e022f54..0b0956f6 100644 --- a/umami/preprocessing.py +++ b/umami/preprocessing.py @@ -184,7 +184,7 @@ if __name__ == "__main__": elif args.to_records: import umami.tf_tools as utft - Converter = utft.h5toTFRecordConverter(config) + Converter = utft.h5_to_tf_record_converter(config) Converter.write_tfrecord() # Give error when nothing is used diff --git a/umami/tests/unit/tf_tools/test_tf_tools_ConvertToRecord.py b/umami/tests/unit/tf_tools/test_tf_tools_ConvertToRecord.py index cfe2065e..d3748fef 100644 --- a/umami/tests/unit/tf_tools/test_tf_tools_ConvertToRecord.py +++ b/umami/tests/unit/tf_tools/test_tf_tools_ConvertToRecord.py @@ -43,7 +43,7 @@ class ConvertTest(unittest.TestCase): ) def test_save_parameters(self): - cv = Convert_to_Record.h5toTFRecordConverter(self.config) + cv = Convert_to_Record.h5_to_tf_record_converter(self.config) # create temporary directory where data should be saved record_dir = tempfile.TemporaryDirectory() cv.save_parameters(record_dir.name) @@ -60,7 +60,7 @@ class ConvertTest(unittest.TestCase): self.assertEqual(parameters, parameters_saved) def test_save_parameters_nadd_vars(self): - cv = Convert_to_Record.h5toTFRecordConverter(self.config) + cv = Convert_to_Record.h5_to_tf_record_converter(self.config) # create temporary directory where data should be saved record_dir = tempfile.TemporaryDirectory() cv.n_add_vars = 4 @@ -79,6 +79,6 @@ class ConvertTest(unittest.TestCase): self.assertEqual(parameters, parameters_saved) def test_faulty_setup(self): - cv = Convert_to_Record.h5toTFRecordConverter(self.faulty_config) + cv = Convert_to_Record.h5_to_tf_record_converter(self.faulty_config) default_chunk_size = 5_000 self.assertEqual(cv.chunk_size, default_chunk_size) diff --git a/umami/tf_tools/Convert_to_Record.py b/umami/tf_tools/Convert_to_Record.py index f951d7f3..a6063f6a 100644 --- a/umami/tf_tools/Convert_to_Record.py +++ b/umami/tf_tools/Convert_to_Record.py @@ -9,7 +9,7 @@ import tensorflow as tf import tqdm -class h5toTFRecordConverter: +class h5_to_tf_record_converter: """h5 converter to tf records.""" def __init__(self, config): diff --git a/umami/tf_tools/__init__.py b/umami/tf_tools/__init__.py index 4f8ac4c1..213b4b2b 100644 --- a/umami/tf_tools/__init__.py +++ b/umami/tf_tools/__init__.py @@ -1,6 +1,6 @@ # flake8: noqa # pylint: skip-file -from umami.tf_tools.Convert_to_Record import h5toTFRecordConverter +from umami.tf_tools.Convert_to_Record import h5_to_tf_record_converter from umami.tf_tools.generators import ( cads_generator, dips_generator, -- GitLab From 89bfd897be6c22b574ec6a0e6ba7af73e28039b0 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 13:44:54 +0200 Subject: [PATCH 12/28] rename to get_configuration --- umami/configuration/Configuration.py | 4 ++-- umami/preprocessing_tools/Configuration.py | 4 ++-- .../tests/unit/preprocessing/test_preprocessing_tools.py | 4 ++-- umami/tests/unit/train_tools/test_NN_tools.py | 8 ++++---- umami/train_tools/Configuration.py | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/umami/configuration/Configuration.py b/umami/configuration/Configuration.py index b4a71163..dca46d5f 100644 --- a/umami/configuration/Configuration.py +++ b/umami/configuration/Configuration.py @@ -22,14 +22,14 @@ class Configuration: self.logger = self.set_logging_level() self.SetTFDebugLevel() self.SetMPLPlottingBackend() - self.GetConfiguration() + self.get_configuration() def load_config_file(self): """Load config file from disk.""" with open(self.yaml_config, "r") as conf: self.config = yaml.load(conf, Loader=yaml.FullLoader) - def GetConfiguration(self): + def get_configuration(self): """Assigne configuration from file to class variables. Raises diff --git a/umami/preprocessing_tools/Configuration.py b/umami/preprocessing_tools/Configuration.py index edc163f2..42245cbe 100644 --- a/umami/preprocessing_tools/Configuration.py +++ b/umami/preprocessing_tools/Configuration.py @@ -26,7 +26,7 @@ class Configuration: self.yaml_config = yaml_config self.yaml_default_config = "configs/preprocessing_default_config.yaml" self.load_config_files() - self.GetConfiguration() + self.get_configuration() self.CheckTracksNames() @property @@ -83,7 +83,7 @@ class Configuration: with open(self.yaml_config, "r") as conf: self.config = self.YAML.load(conf) - def GetConfiguration(self) -> None: + def get_configuration(self) -> None: """Assigne configuration from file to class variables. Raises diff --git a/umami/tests/unit/preprocessing/test_preprocessing_tools.py b/umami/tests/unit/preprocessing/test_preprocessing_tools.py index 69b2c0dd..2718f011 100644 --- a/umami/tests/unit/preprocessing/test_preprocessing_tools.py +++ b/umami/tests/unit/preprocessing/test_preprocessing_tools.py @@ -34,7 +34,7 @@ class ConfigurationTestCase(unittest.TestCase): config = Configuration(self.config_file) del config.config["outfile_name"] with self.assertRaises(KeyError): - config.GetConfiguration() + config.get_configuration() # this functionality is still there but is not used for now # so will keep the test here in case this is used again @@ -42,7 +42,7 @@ class ConfigurationTestCase(unittest.TestCase): # config = Configuration(self.config_file) # del config.config["outfile_name"] # with self.assertWarns(Warning): - # config.GetConfiguration() + # config.get_configuration() def test_GetFileName_no_input(self): config = Configuration(self.config_file) diff --git a/umami/tests/unit/train_tools/test_NN_tools.py b/umami/tests/unit/train_tools/test_NN_tools.py index ff050da0..34534f2e 100644 --- a/umami/tests/unit/train_tools/test_NN_tools.py +++ b/umami/tests/unit/train_tools/test_NN_tools.py @@ -224,7 +224,7 @@ class Configuration_TestCase(unittest.TestCase): config = Configuration(self.config_file) del config.config["model_name"] with self.assertRaises(KeyError): - config.GetConfiguration() + config.get_configuration() def test_double_label_value(self): config = Configuration(self.config_file) @@ -236,7 +236,7 @@ class Configuration_TestCase(unittest.TestCase): ] with self.assertRaises(ValueError): - config.GetConfiguration() + config.get_configuration() def test_double_defined_b_jets(self): config = Configuration(self.config_file) @@ -248,7 +248,7 @@ class Configuration_TestCase(unittest.TestCase): ] with self.assertRaises(ValueError): - config.GetConfiguration() + config.get_configuration() def test_double_defined_c_jets(self): config = Configuration(self.config_file) @@ -260,7 +260,7 @@ class Configuration_TestCase(unittest.TestCase): ] with self.assertRaises(ValueError): - config.GetConfiguration() + config.get_configuration() class MyCallback_TestCase(unittest.TestCase): diff --git a/umami/train_tools/Configuration.py b/umami/train_tools/Configuration.py index e682533c..0a346dd5 100644 --- a/umami/train_tools/Configuration.py +++ b/umami/train_tools/Configuration.py @@ -13,7 +13,7 @@ class Configuration: super().__init__() self.yaml_config = yaml_config self.load_config_file() - self.GetConfiguration() + self.get_configuration() def load_config_file(self): """ "Load config file from disk.""" @@ -21,7 +21,7 @@ class Configuration: with open(self.yaml_config, "r") as conf: self.config = yaml.load(conf, Loader=yaml_loader) - def GetConfiguration(self): + def get_configuration(self): """Assigne configuration from file to class variables. Raises -- GitLab From 2a71b0741517fb9f5f519cc7ecb192187a8c6aab Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 13:53:51 +0200 Subject: [PATCH 13/28] renaming Configuration module to configuration --- umami/configuration/__init__.py | 2 +- .../{Configuration.py => configuration.py} | 16 ++++++++-------- umami/tests/unit/helper_tools/test_tools.py | 2 +- umami/train_tools/__init__.py | 2 +- .../{Configuration.py => configuration.py} | 0 5 files changed, 11 insertions(+), 11 deletions(-) rename umami/configuration/{Configuration.py => configuration.py} (93%) rename umami/train_tools/{Configuration.py => configuration.py} (100%) diff --git a/umami/configuration/__init__.py b/umami/configuration/__init__.py index 014bd1aa..a3a677f2 100644 --- a/umami/configuration/__init__.py +++ b/umami/configuration/__init__.py @@ -1,6 +1,6 @@ # flake8: noqa # pylint: skip-file -from umami.configuration.Configuration import ( +from umami.configuration.configuration import ( Configuration, global_config, logger, diff --git a/umami/configuration/Configuration.py b/umami/configuration/configuration.py similarity index 93% rename from umami/configuration/Configuration.py rename to umami/configuration/configuration.py index dca46d5f..aa39071b 100644 --- a/umami/configuration/Configuration.py +++ b/umami/configuration/configuration.py @@ -20,8 +20,8 @@ class Configuration: ) self.load_config_file() self.logger = self.set_logging_level() - self.SetTFDebugLevel() - self.SetMPLPlottingBackend() + self.set_tf_debug_level() + self.set_mpl_plotting_backend() self.get_configuration() def load_config_file(self): @@ -53,7 +53,7 @@ class Configuration: else: raise KeyError(f"You need to specify {item} in your config file!") - def SetMPLPlottingBackend(self): + def set_mpl_plotting_backend(self): """Setting the plotting backend of matplotlib.""" self.logger.debug( f"Setting Matplotlib's backend to {self.config['MPLPlottingBackend']}" @@ -61,7 +61,7 @@ class Configuration: matplotlib.use(self.config["MPLPlottingBackend"]) - def SetTFDebugLevel(self): + def set_tf_debug_level(self): """Setting the Debug level of tensorflow. For reference see https://stackoverflow.com/questions/35869137/avoid-tensorflow-print-on-standard-error""" # noqa # pylint: disable=C0301 self.logger.debug(f"Setting TFDebugLevel to {self.config['TFDebugLevel']}") @@ -92,11 +92,11 @@ class Configuration: f"The 'DebugLevel' option {self.config['DebugLevel']} set in" " the global config is not valid." ) - ch = logging.StreamHandler() - ch.setLevel(log_levels[self.config["DebugLevel"]]) - ch.setFormatter(CustomFormatter()) + ch_handler = logging.StreamHandler() + ch_handler.setLevel(log_levels[self.config["DebugLevel"]]) + ch_handler.setFormatter(CustomFormatter()) - umami_logger.addHandler(ch) + umami_logger.addHandler(ch_handler) umami_logger.propagate = False return umami_logger diff --git a/umami/tests/unit/helper_tools/test_tools.py b/umami/tests/unit/helper_tools/test_tools.py index d3c98fa8..3c1d6112 100644 --- a/umami/tests/unit/helper_tools/test_tools.py +++ b/umami/tests/unit/helper_tools/test_tools.py @@ -17,7 +17,7 @@ from umami.tools import ( compare_leading_spaces, replaceLineInFile, ) -from umami.train_tools.Configuration import Configuration +from umami.train_tools.configuration import Configuration set_log_level(logger, "DEBUG") diff --git a/umami/train_tools/__init__.py b/umami/train_tools/__init__.py index fc366cfc..10d1817b 100644 --- a/umami/train_tools/__init__.py +++ b/umami/train_tools/__init__.py @@ -1,6 +1,6 @@ # flake8: noqa # pylint: skip-file -from umami.train_tools.Configuration import Configuration +from umami.train_tools.configuration import Configuration from umami.train_tools.NN_tools import ( GetModelPath, GetTestFile, diff --git a/umami/train_tools/Configuration.py b/umami/train_tools/configuration.py similarity index 100% rename from umami/train_tools/Configuration.py rename to umami/train_tools/configuration.py -- GitLab From 62219d9215e196bd644b866d48076816318c7cfb Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 14:18:49 +0200 Subject: [PATCH 14/28] fixing renaming errors --- umami/tools/tools.py | 2 +- umami/train_tools/nn_tools.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/umami/tools/tools.py b/umami/tools/tools.py index 11e9e79c..c36cc29c 100644 --- a/umami/tools/tools.py +++ b/umami/tools/tools.py @@ -3,7 +3,7 @@ import re import yaml -from umami.configuration.Configuration import logger +from umami.configuration.configuration import logger # adding a custom yaml loader in order to be able to have nubers with # scientific notation diff --git a/umami/train_tools/nn_tools.py b/umami/train_tools/nn_tools.py index fb66b99b..60d0ae9b 100644 --- a/umami/train_tools/nn_tools.py +++ b/umami/train_tools/nn_tools.py @@ -764,7 +764,7 @@ def get_test_sample( jets, Umami_labels = LoadJetsFromFile( filepath=filepaths, class_labels=class_labels, - n_jets=n_jets, + nJets=n_jets, cut_vars_dict=cut_vars_dict, variables=jet_variables, print_logger=False, @@ -915,7 +915,7 @@ def get_test_sample_trks( trks, labels = LoadTrksFromFile( filepath=filepaths, class_labels=class_labels, - n_jets=n_jets, + nJets=n_jets, tracks_name=tracks_name, cut_vars_dict=cut_vars_dict, print_logger=print_logger, -- GitLab From b09ca96ec452b3c9ad6806f356428a3081d82a4e Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 14:20:27 +0200 Subject: [PATCH 15/28] add changelog --- changelog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/changelog.md b/changelog.md index a868bc03..b908fffc 100644 --- a/changelog.md +++ b/changelog.md @@ -4,6 +4,7 @@ ### Latest +- Fixing bunch of invalid-name pylint errors [!522](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/522) - Adding error message if file in placeholder does not exist [!519](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/519) - Update the LWTNN scripts [!512](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/512) - Adding pydash to requirements [!517](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/517) -- GitLab From 9e775a11b612108825661a81b4fefeb67639a280 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 14:35:52 +0200 Subject: [PATCH 16/28] snall fix --- umami/tests/unit/train_tools/test_NN_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umami/tests/unit/train_tools/test_NN_tools.py b/umami/tests/unit/train_tools/test_NN_tools.py index 34534f2e..7b996b41 100644 --- a/umami/tests/unit/train_tools/test_NN_tools.py +++ b/umami/tests/unit/train_tools/test_NN_tools.py @@ -8,7 +8,7 @@ import numpy as np from umami.configuration import logger, set_log_level from umami.tools import replaceLineInFile -from umami.train_tools.Configuration import Configuration +from umami.train_tools.configuration import Configuration from umami.train_tools.NN_tools import ( GetModelPath, GetTestFile, -- GitLab From 24630a1628fa5bfff5fca05739d304a9bf79f323 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 15:05:58 +0200 Subject: [PATCH 17/28] small naming fixes --- umami/preprocessing_tools/Scaling.py | 10 +- .../preprocessing_tools/Writing_Train_File.py | 4 +- umami/train_tools/NN_tools.py | 4 +- umami/train_tools/nn_tools.py | 1753 ----------------- 4 files changed, 9 insertions(+), 1762 deletions(-) delete mode 100644 umami/train_tools/nn_tools.py diff --git a/umami/preprocessing_tools/Scaling.py b/umami/preprocessing_tools/Scaling.py index 447c670b..2055d1c2 100644 --- a/umami/preprocessing_tools/Scaling.py +++ b/umami/preprocessing_tools/Scaling.py @@ -487,7 +487,7 @@ class Scaling: ---------- input_file : str, optional File which is used to calculate scaling/shifting, by default None - chunk_size : int, optional + chunk_size : int, optional Scale dict calculated using the given file, by default 1e5 """ @@ -609,7 +609,7 @@ class Scaling: File which is to be scaled. nJets : int Number of jets which are to be scaled. - chunk_size : int, optional + chunk_size : int, optional The number of jets which are loaded and scaled/shifted per step, by default int(10000) @@ -712,7 +712,7 @@ class Scaling: Number of jets which are to be scaled. tracks_name : str Name of the tracks - chunk_size : int, optional + chunk_size : int, optional The number of jets which are loaded and scaled/shifted per step, by default int(10000) @@ -817,7 +817,7 @@ class Scaling: Number of jets which are to be scaled. tracks_scale_dict : dict, optional Scale dict of the track variables., by default None - chunk_size : int, optional + chunk_size : int, optional The number of jets which are loaded and scaled/shifted per step, by default int(10000) @@ -919,7 +919,7 @@ class Scaling: ---------- input_file : str, optional File which is to be scaled., by default None - chunk_size : int, optional + chunk_size : int, optional The number of jets which are loaded and scaled/shifted per step, by default 1e6 """ diff --git a/umami/preprocessing_tools/Writing_Train_File.py b/umami/preprocessing_tools/Writing_Train_File.py index ab837048..e9233417 100644 --- a/umami/preprocessing_tools/Writing_Train_File.py +++ b/umami/preprocessing_tools/Writing_Train_File.py @@ -58,7 +58,7 @@ class TrainSampleWriter: List with the indicies. nJets : int Number of jets used. - chunk_size : int, optional + chunk_size : int, optional The number of jets which are loaded and scaled/shifted per step, by default 100_000 @@ -194,7 +194,7 @@ class TrainSampleWriter: output_file : str, optional Name of the output file. Default is name from config + resampled_scaled_shuffled., by default None - chunk_size : int, optional + chunk_size : int, optional The number of jets which are loaded and scaled/shifted per step, by default 100_000 """ diff --git a/umami/train_tools/NN_tools.py b/umami/train_tools/NN_tools.py index 25741e33..e4133c11 100644 --- a/umami/train_tools/NN_tools.py +++ b/umami/train_tools/NN_tools.py @@ -66,8 +66,8 @@ def get_epoch_from_string(string): Epoch of the model file. """ - m = re.search("model_epoch(.+?).h5", string) - return m.group(1) + epoch = re.search("model_epoch(.+?).h5", string) + return epoch.group(1) def get_validation_dict_name(WP: float, n_jets: int, dir_name: str) -> str: diff --git a/umami/train_tools/nn_tools.py b/umami/train_tools/nn_tools.py deleted file mode 100644 index 60d0ae9b..00000000 --- a/umami/train_tools/nn_tools.py +++ /dev/null @@ -1,1753 +0,0 @@ -"""Helper functions for training tools.""" -from umami.configuration import global_config, logger # isort:skip -import json -import os -import re -from glob import glob -from pathlib import Path -from shutil import copyfile - -import numpy as np -import tensorflow as tf -from tensorflow.keras.callbacks import Callback # pylint: disable=import-error -from tensorflow.keras.models import load_model # pylint: disable=import-error -from tensorflow.keras.utils import CustomObjectScope # pylint: disable=import-error - -import umami.metrics as umt -import umami.tf_tools as utf -from umami.data_tools import LoadJetsFromFile, LoadTrksFromFile -from umami.preprocessing_tools import Configuration as Preprocess_Configuration -from umami.preprocessing_tools import ( - Gen_default_dict, - GetBinaryLabels, - GetVariableDict, - apply_scaling_trks, -) -from umami.tools import natural_keys, replaceLineInFile - - -def get_unique_identifiers(keys: list, prefix: str) -> list: - """Helper function which takes a list of strings, searches them for a given prefix - of the form "prefix_" and returns the remaining part of the matching - strings - - Parameters - ---------- - keys : list - List of strings which are searched for the given prefix - prefix : str - Prefix to search for in the provided strings - - Returns - ------- - identifiers : list - Sorted list of the unique identifiers that could be found. - """ - - identifiers = list( - {key.replace(prefix + "_", "") for key in keys if key.startswith(prefix)} - ) - - return sorted(identifiers) - - -def get_epoch_from_string(string): - """ - Get the epoch from the model file string. - - Parameters - ---------- - string : str - Path of the model file. - - Returns - ------- - epoch : int - Epoch of the model file. - """ - - epoch = re.search("model_epoch(.+?).h5", string) - return epoch.group(1) - - -def get_validation_dict_name(working_point: float, n_jets: int, dir_name: str) -> str: - """ - Get the validation dict name based on working_point, number of jets and dir_name. - - Parameters - ---------- - working_point : float - Working point that was used to calculate validation dict. - n_jets : int - Number of jets that was used to calculate validation dict. - dir_name : str - Path of the directory where the validation dict is saved. - - Returns - ------- - validation_dict_path : str - Path of the validation dict. - """ - - # Get the path of the validation dict - validation_dict_path = os.path.join( - dir_name, - f"validation_working_point{str(working_point).replace('.','p')}_" - f"{int(n_jets)}jets_Dict.json", - ) - - return validation_dict_path - - -def GetModelPath(model_name: str, epoch: int) -> str: - """ - Get the path where the model will be saved/is saved. - - Parameters - ---------- - model_name : str - Name of the model that is to be saved/loaded. - epoch : int - The epoch which is to be saved/loaded - - Returns - ------- - model_path : str - Path to the model file of the specified epoch. - """ - - # Get path - model_path = f"{model_name}/model_files/model_epoch{epoch:03d}.h5" - - # Get logger output for debugging - logger.debug(f"Treating model {model_path}") - - # Return path - return model_path - - -def prepare_history_dict(hist_dict: dict) -> list: - """ - Make the history dict from keras the same shape as the one from the callbacks. - - Parameters - ---------- - hist_dict : dict - Dict with the history inside. - - Returns - ------- - history_dict_list : list - Reshaped history dict as list. Same shape as the one from the callbacks - """ - - # Init a new list - history_dict_list = [] - - # Iterate over the epochs - for epoch_counter in range(len(hist_dict["loss"])): - - # Init a temporary dict for the epoch - tmp_dict = {"epoch": epoch_counter} - - # Add the metrics from this epoch to the dict - for metric in hist_dict: - tmp_dict[metric] = float(hist_dict[metric][epoch_counter]) - - # Append dict to list - history_dict_list.append(tmp_dict) - - # Return dict - return history_dict_list - - -def get_parameters_from_validation_dict_name(dict_name: str) -> dict: - """ - Get the parameters used to calculate the validation dict from the - validation dict name. - - Parameters - ---------- - dict_name : str - Name of the validation dict. - - Returns - ------- - parameters : dict - Dict with the parameters (working_point, n_jets, dir_name) used to calculate - the validation dict. - - Raises - ------ - Exception - If the name of the validation dict could be rebuild from the - extracted parameters. - """ - - # Split the path and only get the dict name - sp = dict_name.split("/")[-1].split("_") - - # Init a new dict for the parameters - parameters = {} - - # Get the parameters from the name and add them to the dict - parameters["working_point"] = float( - sp[1].replace("working_point", "").replace("p", ".") - ) - parameters["n_jets"] = int(sp[2].replace("jets", "")) - parameters["dir_name"] = str(Path(dict_name).parent) - - # Check if the values are correct extracted. Try to build the name - # from the parameters and check if they are identical. - if get_validation_dict_name(**parameters) != dict_name: - raise Exception( - f"Can't infer parameters correctly for {dict_name}. Parameters:" - f" {parameters}" - ) - - # Return the parameters - return parameters - - -def setup_output_directory( - dir_name: str, - clean_start: bool = True, -) -> None: - """ - Check the output directory path and init/clean it. - - Parameters - ---------- - dir_name : str - Path of the output directory. - clean_start : bool - Decide, if the old model files are cleaned or not. - - Raises - ------ - Exception - If the dir_name is an existing file. - """ - - outdir = Path(dir_name) - if outdir.is_dir() and clean_start: - logger.info("Removing model*.h5 and *.json files.") - for model_file in outdir.glob("model*.h5"): - model_file.unlink() - for model_file in outdir.glob("*.json"): - model_file.unlink() - elif outdir.is_dir() and not clean_start: - logger.info("Continue training. Old model files will not be erased.") - elif outdir.is_file(): - raise Exception( - f"{dir_name} is the output directory name but it already exists as a file!" - ) - else: - outdir.mkdir() - - -def create_metadata_folder( - train_config_path: str, - var_dict_path: str, - model_name: str, - preprocess_config_path: str, - model_file_path: str = None, - overwrite_config: bool = False, -) -> None: - """ - Create a metadata folder in the new model_name dir and - copy all configs there and change the paths inside the - configs to the new metadata directory path. - - Parameters - ---------- - train_config_path : str - Path to the train config that is used. - var_dict_path : str - Path to the variable dict that is used. - model_name : str - Model name that is used. - preprocess_config_path : str - Path to the preprocessing config that is used. - model_file_path : str - Path to a model to start from (the model given in model_file). - overwrite_config : bool - If configs already in metadata folder, overwrite - them or not. - """ - - # Check if model path already existing - # If not, make it - os.makedirs(os.path.join(model_name, "metadata"), exist_ok=True) - - # Create directory for models - os.makedirs(os.path.join(model_name, "model_files"), exist_ok=True) - - # Get scale dict - preprocess_config = Preprocess_Configuration(preprocess_config_path) - scale_dict_path = preprocess_config.dict_file - preprocess_parameters_path = preprocess_config.ParameterConfigPath - - # Copy files to metadata folder if not existing - for file_path in [ - train_config_path, - preprocess_config_path, - var_dict_path, - scale_dict_path, - preprocess_parameters_path, - model_file_path, - ]: - if file_path is None: - continue - if (overwrite_config is True) or not os.path.isfile( - os.path.join(model_name, "metadata", os.path.basename(file_path)) - ): - logger.info(f"Copy {file_path} to metadata folder!") - copyfile( - file_path, - os.path.join(model_name, "metadata", os.path.basename(file_path)), - ) - - # Change the paths for the preprocess config and var dict in the - # train_config - if file_path == train_config_path: - metadata_preprocess_config_path = os.path.join( - os.getcwd(), - model_name, - "metadata", - os.path.basename(preprocess_config_path), - ) - - metadata_var_dict_path = os.path.join( - os.getcwd(), - model_name, - "metadata", - os.path.basename(var_dict_path), - ) - - replaceLineInFile( - os.path.join(model_name, "metadata", os.path.basename(file_path)), - "preprocess_config:", - f"preprocess_config: {metadata_preprocess_config_path}", - ) - - replaceLineInFile( - os.path.join(model_name, "metadata", os.path.basename(file_path)), - "var_dict:", - f"var_dict: {metadata_var_dict_path}", - ) - - if model_file_path: - metadata_model_file_path = os.path.join( - os.getcwd(), - model_name, - "metadata", - os.path.basename(model_file_path), - ) - - replaceLineInFile( - os.path.join( - model_name, "metadata", os.path.basename(file_path) - ), - "model_file:", - f"model_file: {metadata_model_file_path}", - ) - - elif file_path == preprocess_parameters_path: - metadata_scale_dict_path = os.path.join( - os.getcwd(), - model_name, - "metadata", - os.path.basename(scale_dict_path), - ) - - metadata_var_dict_path = os.path.join( - os.getcwd(), - model_name, - "metadata", - os.path.basename(var_dict_path), - ) - - replaceLineInFile( - os.path.join(model_name, "metadata", os.path.basename(file_path)), - ".dict_file: &dict_file", - f".dict_file: &dict_file {metadata_scale_dict_path}", - ) - - replaceLineInFile( - os.path.join(model_name, "metadata", os.path.basename(file_path)), - ".var_file: &var_file", - f".var_file: &var_file {metadata_var_dict_path}", - ) - - -class CallbackBase(Callback): - """Base class for the callbacks of the different models. - - This class provides the base functionalites for the different - callbacks needed for the models that are available. - """ - - def __init__( - self, - class_labels: list, - main_class: str, - val_data_dict: dict = None, - model_name: str = "test", - target_beff: float = 0.77, - frac_dict: dict = None, - dict_file_name: str = "DictFile.json", - clean_start: bool = True, - ): - """Init the parameters needed for the callback - - Parameters - ---------- - class_labels : list - List of class labels used in training (ORDER MATTERS!). - main_class : str - Name of the main class which is used. For b-tagging - obviously `bjets`. - val_data_dict : dict - Dict with the loaded validation data. These are loaded - using the `load_validation_data_*` functions. - model_name : str - Name of the model used to evaluate. This is important - for the path where the results are of the callback are saved. - target_beff : float - Float value between 0 and 1 for which main class efficiency - the rejections are calculated. - frac_dict : dict - Dict with the fraction values for the non-main classes. The - values need to add up to 1. - dict_file_name : str - Name of the file where the dict with the results of the callback - are saved. - clean_start : bool - Decide, if the directory where the output is saved will be cleaned - before the training starts, by default True - """ - super().__init__() - - # Add parameters to as attributes - self.class_labels = class_labels - self.main_class = main_class - self.val_data_dict = val_data_dict - self.target_beff = target_beff - self.frac_dict = ( - { - "cjets": 0.018, - "ujets": 0.982, - } - if frac_dict is None - else frac_dict - ) - self.model_name = model_name - self.dict_file_name = dict_file_name - self.clean_start = clean_start - - # Init a list for the result dicts for each epoch - self.dict_list = [] - - # Init the directory and clean it from previous training - setup_output_directory( - dir_name=self.model_name, - clean_start=self.clean_start, - ) - - -class MyCallback(CallbackBase): - """Callback class for the standard taggers - - This class is the callback for the standard taggers. Only one - output (not like the umami tagger) is given. - """ - - def on_epoch_end(self, epoch: int, logs: dict = None): - """Get the needed metrics at epoch end and calculate rest. - - This method saves the training metrics at the end of the - epoch and also calculates the validation metrics and - the rejections for each non-main class for given - efficiency and fraction values. Those are also saved. - - Parameters - ---------- - epoch : int - Number of the epoch which just finished and is now - evaluated and saved. - logs : dict - Dict with the training metrics of the just finished - epoch. - """ - - # Define a dict with the epoch and the training metrics - dict_epoch = { - "epoch": epoch + 1, - "learning_rate": logs["lr"].item(), - "loss": logs["loss"], - "accuracy": logs["accuracy"], - } - - # If val data is given, calculate validaton metrics and rejections - if self.val_data_dict: - result_dict = evaluate_model( - model=self.model, - data_dict=self.val_data_dict, - class_labels=self.class_labels, - main_class=self.main_class, - target_beff=self.target_beff, - frac_dict=self.frac_dict, - ) - - # Once we use python >=3.9 - # (see https://www.python.org/dev/peps/pep-0584/#specification) - # switch to the following: dict_epoch |= result_dict - dict_epoch = {**dict_epoch, **result_dict} - - # Append the dict to the list - self.dict_list.append(dict_epoch) - - # Dump the list in json file - with open(self.dict_file_name, "w") as outfile: - json.dump(self.dict_list, outfile, indent=4) - - -class MyCallbackUmami(CallbackBase): - """Callback class for the umami tagger - - This class is the callback for the umami tagger. Due to the - two outputs of the tagger, we need special metrics etc. - """ - - def on_epoch_end(self, epoch: int, logs: dict = None): - """Get the needed metrics at epoch end and calculate rest. - - This method saves the training metrics at the end of the - epoch and also calculates the validation metrics and - the rejections for each non-main class for given - efficiency and fraction values. Those are also saved. - - Parameters - ---------- - epoch : int - Number of the epoch which just finished and is now - evaluated and saved. - logs : dict - Dict with the training metrics of the just finished - epoch. - """ - - # Define a dict with the epoch and the training metrics - dict_epoch = { - "epoch": epoch + 1, - "learning_rate": logs["lr"].item(), - "loss": logs["loss"], - "loss_dips": logs["dips_loss"], - "loss_umami": logs["umami_loss"], - "accuracy_dips": logs["dips_accuracy"], - "accuracy_umami": logs["umami_accuracy"], - } - - # If val data is given, calculate validaton metrics and rejections - if self.val_data_dict: - result_dict = evaluate_model_umami( - model=self.model, - data_dict=self.val_data_dict, - class_labels=self.class_labels, - main_class=self.main_class, - target_beff=self.target_beff, - frac_dict=self.frac_dict, - ) - - # Once we use python >=3.9 - # (see https://www.python.org/dev/peps/pep-0584/#specification) - # switch to the following: dict_epoch |= result_dict - dict_epoch = {**dict_epoch, **result_dict} - - # Append the dict to the list - self.dict_list.append(dict_epoch) - - # Dump the list in json file - with open(self.dict_file_name, "w") as outfile: - json.dump(self.dict_list, outfile, indent=4) - - -def get_jet_feature_indices(variable_header: dict, exclude: list = None): - """ - Deletes from the jet samples the keys listed in exclude. - - Parameters - ---------- - variable_header : dict - List with the variables. - exclude : list - List with the variables that are to be excluded. - - Returns - ------- - variables : list - List with the new variables without the excluded ones. - excluded_variables : list - List of the excluded variables. - excluded_var_indices : list - List of the indicies of the excluded variables. - """ - - excluded_variables = [] - all_variables = [i for j in variable_header for i in variable_header[j]] - if exclude is None: - return all_variables, excluded_variables, None - missing_header = [] - for exclude_this in exclude: - if exclude_this in variable_header: - excluded_variables.extend(variable_header[exclude_this]) - variable_header.pop(exclude_this, None) - else: - missing_header.append(exclude_this) - variables = [i for j in variable_header for i in variable_header[j]] - # If elements in exclude are not headers, check if they aren't variables - for exclude_that in missing_header: - if exclude_that in variables: - excluded_variables.append(exclude_that) - variables.remove(exclude_that) - else: - logger.warning(f"Variables to exclude not found: {exclude_that}") - # Get the index of the excluded variables for training - excluded_var_indices = [ - i for i, excl in enumerate(all_variables) if excl in excluded_variables - ] - # set to None if the list of excluded variables is empty - excluded_var_indices = ( - None if len(excluded_var_indices) == 0 else excluded_var_indices - ) - logger.debug(f"variables: {variables}") - logger.debug(f"excluded_variables: {excluded_variables}") - logger.debug(f"excluded_var_indices: {excluded_var_indices}") - return variables, excluded_variables, excluded_var_indices - - -def get_jet_feature_position( - variable_list: list, - column_names: list, -) -> list: - """ - Return the index position of the variables listed in variable_list within - the column_names list. - WARNING: should match the column order of the training data! - - Parameters - ---------- - variable_list : list - List with the variables - column_names : list - List with the names of the columns - - Returns - ------- - list - List with the positions of the columns - - Raises - ------ - ValueError - If the variable is not in the set. - """ - position_list = [] - for variable in variable_list: - try: - index_pos = column_names.index(variable) - position_list.append(index_pos) - except ValueError as no_var_err: - raise ValueError( - f"Variable {variable} to fast forward not found in set!" - ) from no_var_err - return position_list - - -def get_test_sample( - input_file: str, - var_dict: str, - preprocess_config: object, - class_labels: list, - n_jets: int = int(3e5), - exclude: list = None, - cut_vars_dict: dict = None, - jet_variables: list = None, - print_logger: bool = True, -): - """ - Load the jet variables and labels. Scale the jet variables for validation - use in the NN's. - - Parameters - ---------- - input_file : str - Path to the file which is to be loaded. - var_dict : str - Variable dict with the wanted jet variables inside. - preprocess_config : object - Loaded preprocessing config that was used. - class_labels : list - List of classes used for training of the model. - n_jets : int - Number of jets that should be loaded. - exclude : list - List of variables that are not loaded. - cut_vars_dict : dict - Dict with the cuts that should be applied. - jet_variables : list - List of variables that are used. - print_logger : bool - Decide, if the logger info is printed or not. - - Returns - ------- - jets : numpy.ndarray - X values of the jets ready to be used in the NN's. - labels : numpy.ndarray - Y values ready to be used in the NN's. - - Raises - ------ - ValueError - If jet_variables and exclude are used at the same time. - RuntimeError - If no file could be found in the given filepath. - KeyError - If variable is used which is not in the scale dict. - """ - - # Assert that the jet variables and exlude are not called at the same time - if jet_variables and exclude: - raise ValueError("You can't set exclude and jet_variables. Choose one!") - - # Adding class_labels check between preprocess_config and given labels - # Try/Except here for backward compatibility - try: - assert preprocess_config.sampling["class_labels"] == class_labels, ( - "class_labels from preprocessing_config and from train_config are" - " different! They need to be the same!" - ) - - except (AttributeError, KeyError): - logger.warning( - "Deprecation Warning: class_labels are given in preparation" - " and not in sampling block! Consider moving this to" - " the sampling block in your config!" - ) - assert preprocess_config.preparation["class_labels"] == class_labels, ( - "class_labels from preprocessing_config and from train_config are" - " different! They need to be the same!" - ) - - # Get the paths of the input file as list - # In case there are multiple files (Wildcard etc.) - filepaths = glob(input_file) - - # Check if filepaths is empty - if len(filepaths) == 0: - raise RuntimeError( - f""" - No file found in path {input_file}! - Check the filepath in your train_config file! - """ - ) - - # Load variables - variable_config = GetVariableDict(var_dict) - - # Load scale dict - with open(preprocess_config.dict_file, "r") as infile: - scale_dict = json.load(infile)["jets"] - - jets, Umami_labels = LoadJetsFromFile( - filepath=filepaths, - class_labels=class_labels, - nJets=n_jets, - cut_vars_dict=cut_vars_dict, - variables=jet_variables, - print_logger=False, - ) - - # Binarize Labels - labels = GetBinaryLabels(Umami_labels) - - # Check if jet_variables is defined - if jet_variables: - # Retrieve the defined variables - variables = jet_variables - excluded_variables = [] - - else: - # Retrieve variables and the excluded variables from the config - variables, excluded_variables, _ = get_jet_feature_indices( - variable_config["train_variables"], exclude - ) - - # Select only wanted variables - jets = jets[variables] - - # Replace inf with nans - jets = jets.replace([np.inf, -np.inf], np.nan) - - logger.info("Replacing default values.") - default_dict = Gen_default_dict(scale_dict) - jets = jets.fillna(default_dict) - - logger.info("Applying scaling and shifting.") - scale_dict_variables = [] - for elem in scale_dict: - scale_dict_variables.append(elem["name"]) - if elem["name"] not in variables: - if print_logger: - if elem["name"] in excluded_variables: - logger.info( - f"{elem['name']} has been excluded from variable" - " config (is in scale dict)." - ) - else: - logger.warning( - f"{elem['name']} in scale dict but not in variable config." - ) - continue - if "isDefaults" in elem["name"]: - continue - jets[elem["name"]] -= elem["shift"] - jets[elem["name"]] /= elem["scale"] - if not set(variables).issubset(scale_dict_variables): - raise KeyError( - f"Requested {(set(variables).difference(scale_dict_variables))}" - " which are not in scale dict." - ) - - # Return jets and labels - return jets, labels - - -def get_test_sample_trks( - input_file: str, - var_dict: str, - preprocess_config: object, - class_labels: list, - tracks_name: str, - n_jets: int = int(3e5), - cut_vars_dict: dict = None, - print_logger: bool = False, -): - """ - Load the track variables and labels. Scale the track variables for validation - use in the NN's. - - Parameters - ---------- - input_file : str - Path to the file which is to be loaded. - var_dict : str - Variable dict with the wanted track variables inside. - preprocess_config : object - Loaded preprocessing config that was used. - class_labels : list - List of classes used for training of the model. - tracks_name : str - Name of tracks collection to use. - n_jets : int - Number of jets that should be loaded. - cut_vars_dict : dict - Dict with the cuts that should be applied. - print_logger : bool - Decide, if the logger info is printed or not. - - Returns - ------- - trks : numpy.ndarray - X values of the tracks ready to be used in the NN's. - binary_labels : numpy.ndarray - Y values ready to be used in the NN's. - - Raises - ------ - RuntimeError - If no file could be found in the given filepath. - """ - - # Adding class_labels check between preprocess_config and given labels - # Try/Except here for backward compatibility - try: - assert preprocess_config.sampling["class_labels"] == class_labels, ( - "class_labels from preprocessing_config and from train_config are" - " different! They need to be the same!" - ) - - except (AttributeError, KeyError): - logger.warning( - "Deprecation Warning: class_labels are given in preparation" - " and not in sampling block! Consider moving this to" - " the sampling block in your config!" - ) - assert preprocess_config.preparation["class_labels"] == class_labels, ( - "class_labels from preprocessing_config and from train_config are" - " different! They need to be the same!" - ) - - # making sure the n_jets aregument is an integer - n_jets = int(n_jets) - # Get the paths of the input file as list - # In case there are multiple files (Wildcard etc.) - filepaths = glob(input_file) - - # Check if filepaths is empty - if len(filepaths) == 0: - raise RuntimeError( - f""" - No file found in path {input_file}! - Check the filepath in your train_config file! - """ - ) - - # Load variables - variable_config = GetVariableDict(var_dict) - - # Load scale dict for the tracks - with open(preprocess_config.dict_file, "r") as infile: - scale_dict = json.load(infile)[f"{tracks_name}"] - - trks, labels = LoadTrksFromFile( - filepath=filepaths, - class_labels=class_labels, - nJets=n_jets, - tracks_name=tracks_name, - cut_vars_dict=cut_vars_dict, - print_logger=print_logger, - ) - - # Binarize the labels - binary_labels = GetBinaryLabels(labels) - - # Apply scaling to the tracks - trks, _ = apply_scaling_trks( - trks=trks, - variable_config=variable_config, - scale_dict=scale_dict, - tracks_name=tracks_name, - ) - - return trks, binary_labels - - -def load_validation_data_umami( - train_config: object, - preprocess_config: object, - n_jets: int, - jets_var_list: list = None, - convert_to_tensor: bool = False, - nCond: int = None, -) -> dict: - """ - Load the validation data for UMAMI. - - Parameters - ---------- - train_config : object - Loaded train_config object. - preprocess_config : object - Loaded preprocess_config object. - n_jets : int - Number of jets to load. - jets_var_list : list - List with jet variables that are to be loaded. - convert_to_tensor : bool - Decide, if the validation data are converted to - tensorflow tensors to avoid memory leaks. - nCond: int - Number of addittional variables used for attention - - Returns - ------- - val_data_dict : dict - Dict with the validation data. - """ - if jets_var_list is None: - jets_var_list = [] - # Define nn_structure and the Eval params - nn_structure = train_config.nn_structure - - # Init a new dict for the loaded val data - val_data_dict = {} - val_files = train_config.validation_files - - # Set the tracks collection name - tracks_name = train_config.tracks_name - logger.debug(f"Using tracks_name value '{tracks_name}' for validation") - - for val_file_identifier, val_file_config in val_files.items(): - logger.info(f"Loading validation file {val_file_identifier}") - # Get the cut vars dict if defined - cut_vars_dict = ( - val_file_config["variable_cuts"] - if "variable_cuts" in val_file_config - else None - ) - - # Check for excluded variables - exclude = None - if "exclude" in train_config.config: - exclude = train_config.config["exclude"] - - (x_valid, x_valid_trk, y_valid,) = GetTestFile( - input_file=val_file_config["path"], - var_dict=train_config.var_dict, - preprocess_config=preprocess_config, - class_labels=nn_structure["class_labels"], - tracks_name=tracks_name, - n_jets=n_jets, - exclude=exclude, - jet_variables=jets_var_list, - cut_vars_dict=cut_vars_dict, - ) - - if convert_to_tensor: - # Transform to tf.tensors and add to val_dict - val_data_dict[f"X_valid_{val_file_identifier}"] = tf.convert_to_tensor( - x_valid, dtype=tf.float64 - ) - val_data_dict[f"X_valid_trk_{val_file_identifier}"] = tf.convert_to_tensor( - x_valid_trk, dtype=tf.float64 - ) - val_data_dict[f"Y_valid_{val_file_identifier}"] = tf.convert_to_tensor( - y_valid, dtype=tf.int64 - ) - if nCond is not None: - val_data_dict[ - f"X_valid_addvars_{val_file_identifier}" - ] = tf.convert_to_tensor(x_valid.iloc[:, :nCond], dtype=tf.float64) - - else: - val_data_dict[f"X_valid_{val_file_identifier}"] = x_valid - val_data_dict[f"X_valid_trk_{val_file_identifier}"] = x_valid_trk - val_data_dict[f"Y_valid_{val_file_identifier}"] = y_valid - if nCond is not None: - val_data_dict[f"X_valid_addvars_{val_file_identifier}"] = x_valid.iloc[ - :, :nCond - ] - - # Return the val data dict - return val_data_dict - - -def load_validation_data_dl1( - train_config: object, - preprocess_config: object, - n_jets: int, - convert_to_tensor: bool = False, -) -> dict: - """ - Load the validation data for DL1. - - Parameters - ---------- - train_config : object - Loaded train_config object. - preprocess_config : object - Loaded preprocess_config object. - n_jets : int - Number of jets to load. - convert_to_tensor : bool - Decide, if the validation data are converted to - tensorflow tensors to avoid memory leaks. - - Returns - ------- - val_data_dict : dict - Dict with the validation data. - """ - - # Define nn_structure and the Eval params - nn_structure = train_config.nn_structure - val_data_dict = {} - val_files = train_config.validation_files - - # Ensure the n_jets is an int - n_jets = int(n_jets) - - # Check for excluded variables - exclude = None - if "exclude" in train_config.config: - exclude = train_config.config["exclude"] - - # loop over validation files and load X_valid, Y_valid for each file - for val_file_identifier, val_file_config in val_files.items(): - logger.info(f"Loading validation file {val_file_identifier}") - - cut_vars_dict = ( - val_file_config["variable_cuts"] - if "variable_cuts" in val_file_config - else None - ) - - (X_valid, Y_valid,) = get_test_sample( - input_file=val_file_config["path"], - var_dict=train_config.var_dict, - preprocess_config=preprocess_config, - class_labels=nn_structure["class_labels"], - n_jets=n_jets, - exclude=exclude, - cut_vars_dict=cut_vars_dict, - ) - - if convert_to_tensor: - # Transform to tf.tensors and add to val_dict - val_data_dict[f"X_valid_{val_file_identifier}"] = tf.convert_to_tensor( - X_valid, dtype=tf.float64 - ) - val_data_dict[f"Y_valid_{val_file_identifier}"] = tf.convert_to_tensor( - Y_valid, dtype=tf.int64 - ) - - else: - val_data_dict[f"X_valid_{val_file_identifier}"] = X_valid - val_data_dict[f"Y_valid_{val_file_identifier}"] = Y_valid - - # Return the val data dict - return val_data_dict - - -def load_validation_data_dips( - train_config: object, - preprocess_config: object, - n_jets: int, - convert_to_tensor: bool = False, -) -> dict: - """ - Load the validation data for DIPS. - - Parameters - ---------- - train_config : object - Loaded train_config object. - preprocess_config : object - Loaded preprocess_config object. - n_jets : int - Number of jets to load. - convert_to_tensor : bool - Decide, if the validation data are converted to - tensorflow tensors to avoid memory leaks. - - Returns - ------- - val_data_dict : dict - Dict with the validation data. - """ - - # Define nn_structure and the Eval params - nn_structure = train_config.nn_structure - val_data_dict = {} - val_files = train_config.validation_files - - # Set the tracks collection name - tracks_name = train_config.tracks_name - logger.debug(f"Using tracks_name value '{tracks_name}' for validation") - - # loop over validation files and load X_valid, Y_valid for each file - for val_file_identifier, val_file_config in val_files.items(): - logger.info(f"Loading validation file {val_file_identifier}") - - cut_vars_dict = ( - val_file_config["variable_cuts"] - if "variable_cuts" in val_file_config - else None - ) - - (X_valid, Y_valid,) = get_test_sample_trks( - input_file=val_file_config["path"], - var_dict=train_config.var_dict, - preprocess_config=preprocess_config, - class_labels=nn_structure["class_labels"], - tracks_name=tracks_name, - n_jets=n_jets, - cut_vars_dict=cut_vars_dict, - ) - - if convert_to_tensor: - # Transform to tf.tensors and add to val_dict - val_data_dict[f"X_valid_{val_file_identifier}"] = tf.convert_to_tensor( - X_valid, dtype=tf.float64 - ) - val_data_dict[f"Y_valid_{val_file_identifier}"] = tf.convert_to_tensor( - Y_valid, dtype=tf.int64 - ) - - else: - val_data_dict[f"X_valid_{val_file_identifier}"] = X_valid - val_data_dict[f"Y_valid_{val_file_identifier}"] = Y_valid - - # Return the val data dict - return val_data_dict - - -def GetTestFile( - input_file: str, - var_dict: str, - preprocess_config: object, - class_labels: list, - tracks_name: str, - n_jets: int, - exclude: list = None, - cut_vars_dict: dict = None, - jet_variables: list = None, - print_logger: bool = True, -): - """ - Load the jet and track variables and labels. Scale the jet - and track variables for validation use in the NN's. - - Parameters - ---------- - input_file : str - Path to the file which is to be loaded. - var_dict : str - Variable dict with the wanted jet variables inside. - preprocess_config : object - Loaded preprocessing config that was used. - class_labels : list - List of classes used for training of the model. - tracks_name : str - Name of the tracks collection to use. - n_jets : int - Number of jets that should be loaded. - exclude : list - List of variables that are not loaded. - cut_vars_dict : dict - Dict with the cuts that should be applied. - jet_variables : list - List of variables that are used. - print_logger : bool - Decide, if the logger info is printed or not. - - Returns - ------- - X : numpy.ndarray - X values of the jets ready to be used in the NN's. - X_trk : numpy.ndarray - X values of the tracks ready to be used in the NN's. - Y : numpy.ndarray - Y values ready to be used in the NN's. - """ - - X_trk, Y_trk = get_test_sample_trks( - input_file=input_file, - var_dict=var_dict, - preprocess_config=preprocess_config, - class_labels=class_labels, - tracks_name=tracks_name, - n_jets=int(n_jets), - cut_vars_dict=cut_vars_dict, - print_logger=False, - ) - - X, Y = get_test_sample( - input_file=input_file, - var_dict=var_dict, - preprocess_config=preprocess_config, - class_labels=class_labels, - n_jets=int(n_jets), - exclude=exclude, - cut_vars_dict=cut_vars_dict, - jet_variables=jet_variables, - print_logger=print_logger, - ) - - assert np.equal(Y, Y_trk).all() - - return X, X_trk, Y - - -def evaluate_model_umami( - model: object, - data_dict: dict, - class_labels: list, - main_class: str, - frac_dict: dict, - target_beff: float = 0.77, -) -> dict: - """ - Evaluate the UMAMI model on the data provided. - - Parameters - ---------- - model : object - Loaded UMAMI model for evaluation. - data_dict : dict - Dict with the loaded data which are to be evaluated. - class_labels : list - List of classes used for training of the model. - main_class : str - Main class which is to be tagged. - target_beff : float - Working Point which is to be used for evaluation. - frac_dict : dict - Dict with the fractions of the non-main classes. - Sum needs to be one! - - Returns - ------- - result_dict : dict - Dict with validation metrics/rejections. - """ - - validation_file_identifiers = get_unique_identifiers( - keys=list(data_dict.keys()), prefix="Y_valid" - ) - - if len(validation_file_identifiers) == 0: - logger.warning("Didn't find any validation file identifiers.") - - result_dict = {} - - # loop over validation files and load X_valid, X_valid_trk, Y_valid for each file - for val_file_identifier in validation_file_identifiers: - # Check which input data need to be used - # Calculate accuracy andloss of UMAMI and Dips part - if f"X_valid_addvars_{val_file_identifier}" in data_dict: - x = [ - data_dict[f"X_valid_trk_{val_file_identifier}"], - data_dict[f"X_valid_addvars_{val_file_identifier}"], - data_dict[f"X_valid_{val_file_identifier}"], - ] - else: - x = [ - data_dict[f"X_valid_trk_{val_file_identifier}"], - data_dict[f"X_valid_{val_file_identifier}"], - ] - (loss, dips_loss, umami_loss, dips_accuracy, umami_accuracy,) = model.evaluate( - x, - data_dict[f"Y_valid_{val_file_identifier}"], - batch_size=15_000, - use_multiprocessing=True, - workers=8, - verbose=0, - ) - - # Evaluate with the model for predictions - y_pred_dips, y_pred_umami = model.predict( - x, - batch_size=15_000, - use_multiprocessing=True, - workers=8, - verbose=0, - ) - - # Get rejections for DIPS and UMAMI - rej_dict_dips, disc_cut_dips = umt.GetRejection( - y_pred=y_pred_dips, - y_true=data_dict[f"Y_valid_{val_file_identifier}"], - class_labels=class_labels, - main_class=main_class, - frac_dict=frac_dict["dips"], - target_eff=target_beff, - unique_identifier=val_file_identifier, - subtagger="dips", - ) - rej_dict_umami, disc_cut_umami = umt.GetRejection( - y_pred=y_pred_umami, - y_true=data_dict[f"Y_valid_{val_file_identifier}"], - class_labels=class_labels, - main_class=main_class, - frac_dict=frac_dict["umami"], - target_eff=target_beff, - unique_identifier=val_file_identifier, - subtagger="umami", - ) - - # Write metrics to results dict - # TODO Change this in python 3.9 - result_dict.update( - { - f"val_loss_{val_file_identifier}": loss, - f"val_loss_dips_{val_file_identifier}": dips_loss, - f"val_loss_umami_{val_file_identifier}": umami_loss, - f"val_acc_dips_{val_file_identifier}": dips_accuracy, - f"val_acc_umami_{val_file_identifier}": umami_accuracy, - f"disc_cut_dips_{val_file_identifier}": disc_cut_dips, - f"disc_cut_umami_{val_file_identifier}": disc_cut_umami, - } - ) - - # Write rejections to the results dict - # TODO Change this in python 3.9 - result_dict.update(rej_dict_umami) - result_dict.update(rej_dict_dips) - - return result_dict - - -def evaluate_model( - model: object, - data_dict: dict, - class_labels: list, - main_class: str, - target_beff: float = 0.77, - frac_dict: dict = None, -) -> dict: - """ - Evaluate the DIPS/DL1 model on the data provided. - - Parameters - ---------- - model : object - Loaded UMAMI model for evaluation. - data_dict : dict - Dict with the loaded data which are to be evaluated. - class_labels : list - List of classes used for training of the model. - main_class : str - Main class which is to be tagged. - target_beff : float - Working Point which is to be used for evaluation. - frac_dict : dict - Dict with the fractions of the non-main classes. - Sum needs to be one! - - Returns - ------- - result_dict : dict - Dict with validation metrics/rejections. - """ - - validation_file_identifiers = get_unique_identifiers( - keys=list(data_dict.keys()), prefix="Y_valid" - ) - - if len(validation_file_identifiers) == 0: - logger.warning("Didn't find any validation file identifiers.") - - result_dict = {} - # loop over validation files and load X_valid, Y_valid for each file - for val_file_identifier in validation_file_identifiers: - # Check which input data need to be used - if ( - f"X_valid_trk_{val_file_identifier}" in data_dict - and f"X_valid_{val_file_identifier}" in data_dict - ): - x = [ - data_dict[f"X_valid_trk_{val_file_identifier}"], - data_dict[f"X_valid_{val_file_identifier}"], - ] - - elif ( - f"X_valid_trk_{val_file_identifier}" in data_dict - and f"X_valid_{val_file_identifier}" not in data_dict - ): - x = data_dict[f"X_valid_trk_{val_file_identifier}"] - - else: - x = data_dict[f"X_valid_{val_file_identifier}"] - - loss, accuracy = model.evaluate( - x=x, - y=data_dict[f"Y_valid_{val_file_identifier}"], - batch_size=15_000, - use_multiprocessing=True, - workers=8, - verbose=0, - ) - - y_pred_dips = model.predict( - x=x, - batch_size=15_000, - use_multiprocessing=True, - workers=8, - verbose=0, - ) - - rej_dict, disc_cut = umt.GetRejection( - y_pred=y_pred_dips, - y_true=data_dict[f"Y_valid_{val_file_identifier}"], - unique_identifier=val_file_identifier, - class_labels=class_labels, - main_class=main_class, - frac_dict=frac_dict, - target_eff=target_beff, - ) - - # Adding the results to result_dict - result_dict.update( - { - f"val_loss_{val_file_identifier}": loss, - f"val_acc_{val_file_identifier}": accuracy, - f"disc_cut_{val_file_identifier}": disc_cut, - } - ) - - # Write the rejection values to the results dict - # TODO Change this in python 3.9 - result_dict.update( - {f"{key}": rej_dict[key] for key in rej_dict} # pylint: disable=C0206 - ) - - # Return finished dict - return result_dict - - -def calc_validation_metrics( - train_config: object, - preprocess_config: object, - tagger: str, - target_beff: float = 0.77, - n_jets: int = int(3e5), - model_string: str = "model_epoch", -) -> str: - """ - Calculates the validation metrics and rejections for each epoch - and dump it into a json. - - Parameters - ---------- - train_config : object - The loaded train config object. - preprocess_config : object - The loaded preprocess config object. - tagger : str - Name of the tagger that is used to calcualte metrics. - target_beff : float - Working point that is to be used. - n_jets : int - Number of jets to use for calculation. - model_string : str - Name of the model files. - - Returns - ------- - output_file_path - Path to the validation dict where the results are saved in. - - Raises - ------ - ValueError - If "tagger" is not dips, dl1, umami or cads. - """ - - # Get evaluation parameters and NN structure from train config - Eval_parameters = train_config.Eval_parameters_validation - nn_structure = train_config.nn_structure - Second_model_string = ( - "dips_model_" if model_string == "model_epoch" else "model_epoch" - ) - - # Make a list with the model epochs saves - training_output = [ - os.path.join(f"{train_config.model_name}/model_files/", f) - for f in os.listdir(f"{train_config.model_name}/model_files/") - if model_string in f - ] - - if len(training_output) == 0: - logger.warning( - f"{model_string} models used but not found! Using {Second_model_string}" - ) - - # Set new model string - model_string = Second_model_string - - # Make a list with the model epochs saves with second model name string - training_output = [ - os.path.join(f"{train_config.model_name}/model_files/", f) - for f in os.listdir(f"{train_config.model_name}/model_files/") - if model_string in f - ] - - # Open the json file and load the training out - try: - with open( - get_validation_dict_name( - working_point=Eval_parameters["working_point"], - n_jets=Eval_parameters["n_jets"], - dir_name=train_config.model_name, - ), - "r", - ) as training_out_json: - training_output_list = json.load(training_out_json) - - except FileNotFoundError: - logger.info("No callback json file with validation metrics found! Make new one") - training_output_list = [ - {"epoch": n} for n in range(train_config.nn_structure["epochs"]) - ] - - # Init a results list - results = [] - - # TODO Change in Python 3.10 - # Check tagger and load the correct val data - if tagger.casefold() == "umami": - data_dict = load_validation_data_umami( - train_config=train_config, - preprocess_config=preprocess_config, - n_jets=n_jets, - convert_to_tensor=False, - ) - - elif tagger.casefold() == "dl1": - data_dict = load_validation_data_dl1( - train_config=train_config, - preprocess_config=preprocess_config, - n_jets=n_jets, - convert_to_tensor=False, - ) - - elif tagger.casefold() == "dips": - data_dict = load_validation_data_dips( - train_config=train_config, - preprocess_config=preprocess_config, - n_jets=n_jets, - convert_to_tensor=False, - ) - - elif tagger.casefold() == "cads": - data_dict = load_validation_data_umami( - train_config=train_config, - preprocess_config=preprocess_config, - n_jets=n_jets, - jets_var_list=[ - global_config.etavariable, - global_config.pTvariable, - ], - convert_to_tensor=False, - ) - - else: - raise ValueError(f"Tagger {tagger} is not supported!") - - # Loop over the different model savepoints at each epoch - for n, model_file in enumerate(sorted(training_output, key=natural_keys)): - logger.info(f"Working on {n+1}/{len(training_output)} input files") - - # Init results dict to save to - result_dict = {} - - # Get the epoch number from the .h5 file - try: - epoch = int( - model_file[ - model_file.rfind(f"{model_string}") - + len(f"{model_string}") : model_file.find(".h5") - ] - ) - - except ValueError as val_error: - raise ValueError( - f"Epoch could not be extracted from {model_string}!" - ) from val_error - - # Load the epoch from json and add it to dict - for train_epoch in training_output_list: - if epoch == train_epoch["epoch"]: - result_dict = train_epoch - - # Ensure the epoch is in the dict - result_dict["epoch"] = epoch - - if tagger.casefold() == "umami": - # Load UMAMI model - umami = load_model(model_file, {"Sum": utf.Sum}) - - # Evaluate Umami model - val_result_dict = evaluate_model_umami( - model=umami, - data_dict=data_dict, - class_labels=nn_structure["class_labels"], - main_class=nn_structure["main_class"], - target_beff=target_beff, - frac_dict=Eval_parameters["frac_values"], - ) - - # Delete model - del umami - - elif tagger.casefold() == "dl1": - # Load DL1 model - dl1 = load_model(model_file) - - # Evaluate DL1 model - val_result_dict = evaluate_model( - model=dl1, - data_dict=data_dict, - class_labels=nn_structure["class_labels"], - main_class=nn_structure["main_class"], - target_beff=target_beff, - frac_dict=Eval_parameters["frac_values"], - ) - - # Delete model - del dl1 - - elif tagger.casefold() == "dips": - # Load DIPS model - with CustomObjectScope({"Sum": utf.Sum}): - dips = load_model(model_file) - - # Validate dips - val_result_dict = evaluate_model( - model=dips, - data_dict=data_dict, - class_labels=nn_structure["class_labels"], - main_class=nn_structure["main_class"], - target_beff=target_beff, - frac_dict=Eval_parameters["frac_values"], - ) - - # Delete model - del dips - - elif tagger.casefold() == "cads": - # Load DIPS Conditional Attention model - with CustomObjectScope( - { - "Sum": utf.Sum, - "Attention": utf.Attention, - "DeepSet": utf.DeepSet, - "AttentionPooling": utf.AttentionPooling, - "DenseNet": utf.DenseNet, - "ConditionalAttention": utf.ConditionalAttention, - "ConditionalDeepSet": utf.ConditionalDeepSet, - } - ): - cads = load_model(model_file) - - # Validate dips - val_result_dict = evaluate_model( - model=cads, - data_dict=data_dict, - class_labels=nn_structure["class_labels"], - main_class=nn_structure["main_class"], - target_beff=target_beff, - frac_dict=Eval_parameters["frac_values"], - ) - - # Delete model - del cads - - else: - raise ValueError(f"Tagger {tagger} is not supported!") - - # Save results in dict - for k, v in val_result_dict.items(): - result_dict[k] = v - - # Append results dict to list - results.append(result_dict) - - # Sort the results after epoch - results = sorted(results, key=lambda x: x["epoch"]) - - # Get validation dict name - output_file_path = get_validation_dict_name( - target_beff, n_jets, train_config.model_name - ) - - # Dump dict into json - with open(output_file_path, "w") as outfile: - json.dump(results, outfile, indent=4) - - # Return Validation dict name - return output_file_path -- GitLab From 2f74e693812ed8151b8cbd91e36b8c94e8982cfe Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 15:12:43 +0200 Subject: [PATCH 18/28] adding test_examples to coverage report --- .gitlab/workflow/.coverage-gitlab-ci.yaml | 1 + .gitlab/workflow/.integration_test-gitlab-ci.yaml | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitlab/workflow/.coverage-gitlab-ci.yaml b/.gitlab/workflow/.coverage-gitlab-ci.yaml index 0a70cf65..5f717cc6 100644 --- a/.gitlab/workflow/.coverage-gitlab-ci.yaml +++ b/.gitlab/workflow/.coverage-gitlab-ci.yaml @@ -8,6 +8,7 @@ - test_plotting_umami_dl1 - test_plotting_umami_umami - unittest_parallel + - test_examples test_coverage: stage: coverage_test_stage diff --git a/.gitlab/workflow/.integration_test-gitlab-ci.yaml b/.gitlab/workflow/.integration_test-gitlab-ci.yaml index ef1066d3..a4b00984 100644 --- a/.gitlab/workflow/.integration_test-gitlab-ci.yaml +++ b/.gitlab/workflow/.integration_test-gitlab-ci.yaml @@ -450,7 +450,8 @@ test_examples: <<: *test_template stage: integration_test_plotting script: - - pytest -v umami/tests/integration/test_examples.py + - pytest --cov=./ --cov-report= ./umami/tests/integration/test_examples.py -v -s --junitxml=report.xml + - cp .coverage ./coverage_files/.coverage.test_examples artifacts: paths: - docs/ci_assets -- GitLab From fee22aa4b15bd0ff56703c68580cea3e647051bb Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 15:32:22 +0200 Subject: [PATCH 19/28] fixing file coverage artifacts --- .gitlab/workflow/.integration_test-gitlab-ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab/workflow/.integration_test-gitlab-ci.yaml b/.gitlab/workflow/.integration_test-gitlab-ci.yaml index a4b00984..bc4a6ec8 100644 --- a/.gitlab/workflow/.integration_test-gitlab-ci.yaml +++ b/.gitlab/workflow/.integration_test-gitlab-ci.yaml @@ -455,4 +455,5 @@ test_examples: artifacts: paths: - docs/ci_assets + - coverage_files/ expire_in: 1 day -- GitLab From 6831cc28e83e61ba4727f606599cb39945f03187 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Tue, 19 Apr 2022 17:27:17 +0200 Subject: [PATCH 20/28] removing dependencies from test_plot_input_vars CI job --- .gitlab/workflow/.integration_test-gitlab-ci.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitlab/workflow/.integration_test-gitlab-ci.yaml b/.gitlab/workflow/.integration_test-gitlab-ci.yaml index bc4a6ec8..316c5dc1 100644 --- a/.gitlab/workflow/.integration_test-gitlab-ci.yaml +++ b/.gitlab/workflow/.integration_test-gitlab-ci.yaml @@ -380,8 +380,7 @@ test_evaluate_tagger_in_files: test_plot_input_vars: <<: *test_template stage: integration_test_plotting - needs: *dependencies_from_train_stage - dependencies: *dependencies_from_train_stage + dependencies: [] script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_input_vars_plot.py -v -s --junitxml=report.xml - cp .coverage ./coverage_files/.coverage.test_input_vars_plot -- GitLab From f7a3ffd8c9a4488c781edf8e0779ba2c1d40122a Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Wed, 20 Apr 2022 09:54:15 +0200 Subject: [PATCH 21/28] creating coverage folder --- .gitlab/workflow/.integration_test-gitlab-ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab/workflow/.integration_test-gitlab-ci.yaml b/.gitlab/workflow/.integration_test-gitlab-ci.yaml index 316c5dc1..de2eae9f 100644 --- a/.gitlab/workflow/.integration_test-gitlab-ci.yaml +++ b/.gitlab/workflow/.integration_test-gitlab-ci.yaml @@ -383,6 +383,7 @@ test_plot_input_vars: dependencies: [] script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_input_vars_plot.py -v -s --junitxml=report.xml + - mkdir -p ./coverage_files - cp .coverage ./coverage_files/.coverage.test_input_vars_plot artifacts: <<: *artifact_template -- GitLab From ef90c7831c538182874b89f75dd4ef5e53b82b00 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Wed, 20 Apr 2022 10:21:29 +0200 Subject: [PATCH 22/28] small ci logic fix --- .gitlab/workflow/.integration_test-gitlab-ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab/workflow/.integration_test-gitlab-ci.yaml b/.gitlab/workflow/.integration_test-gitlab-ci.yaml index de2eae9f..5bdcb33b 100644 --- a/.gitlab/workflow/.integration_test-gitlab-ci.yaml +++ b/.gitlab/workflow/.integration_test-gitlab-ci.yaml @@ -380,10 +380,10 @@ test_evaluate_tagger_in_files: test_plot_input_vars: <<: *test_template stage: integration_test_plotting - dependencies: [] + dependencies: linter + needs: linter script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_input_vars_plot.py -v -s --junitxml=report.xml - - mkdir -p ./coverage_files - cp .coverage ./coverage_files/.coverage.test_input_vars_plot artifacts: <<: *artifact_template -- GitLab From 4268549a901b4fef7837eb12996fcc4d10af9e65 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Wed, 20 Apr 2022 10:25:43 +0200 Subject: [PATCH 23/28] syntax fix --- .gitlab/workflow/.integration_test-gitlab-ci.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitlab/workflow/.integration_test-gitlab-ci.yaml b/.gitlab/workflow/.integration_test-gitlab-ci.yaml index 5bdcb33b..7d9c6d6e 100644 --- a/.gitlab/workflow/.integration_test-gitlab-ci.yaml +++ b/.gitlab/workflow/.integration_test-gitlab-ci.yaml @@ -380,8 +380,10 @@ test_evaluate_tagger_in_files: test_plot_input_vars: <<: *test_template stage: integration_test_plotting - dependencies: linter - needs: linter + dependencies: + - linter + needs: + - linter script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_input_vars_plot.py -v -s --junitxml=report.xml - cp .coverage ./coverage_files/.coverage.test_input_vars_plot -- GitLab From 268f19e3fb5719374860742ed379db2b572d556e Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Wed, 20 Apr 2022 10:50:20 +0200 Subject: [PATCH 24/28] fixing ci dependencies --- .gitlab/workflow/.coverage-gitlab-ci.yaml | 15 ++++++++++++++- .gitlab/workflow/.integration_test-gitlab-ci.yaml | 14 -------------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/.gitlab/workflow/.coverage-gitlab-ci.yaml b/.gitlab/workflow/.coverage-gitlab-ci.yaml index 5f717cc6..42465a0d 100644 --- a/.gitlab/workflow/.coverage-gitlab-ci.yaml +++ b/.gitlab/workflow/.coverage-gitlab-ci.yaml @@ -3,12 +3,25 @@ # ---------------------------------------------------------------------------- .coverage_dependencies: &coverage_dependencies + - test_evaluate_tagger_in_files + - test_examples - test_plot_input_vars - test_plotting_umami_dips - test_plotting_umami_dl1 - test_plotting_umami_umami + - test_train_cads + - test_train_cond_att_umami + - test_train_dips + - test_train_dips_four_classes + - test_train_dl1r + - test_train_tfrecords_cads + - test_train_tfrecords_cond_att_umami + - test_train_tfrecords_dips + - test_train_tfrecords_umami + - test_train_umami - unittest_parallel - - test_examples + + test_coverage: stage: coverage_test_stage diff --git a/.gitlab/workflow/.integration_test-gitlab-ci.yaml b/.gitlab/workflow/.integration_test-gitlab-ci.yaml index 7d9c6d6e..be10cf37 100644 --- a/.gitlab/workflow/.integration_test-gitlab-ci.yaml +++ b/.gitlab/workflow/.integration_test-gitlab-ci.yaml @@ -25,20 +25,6 @@ junit: report.xml -.dependencies_from_train_stage: &dependencies_from_train_stage - - test_train_dips - - test_train_dips_four_classes - - test_train_tfrecords_dips - - test_train_cads - - test_train_tfrecords_cads - - test_train_dl1r - - test_train_umami - - test_train_tfrecords_umami - - test_train_cond_att_umami - - test_train_tfrecords_cond_att_umami - - test_evaluate_tagger_in_files - - test_preprocessing_dips_count: <<: *test_template stage: integration_test_preprocessing -- GitLab From acb525f6e9c9680765b45d3f69bbbce3ec8605d4 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Wed, 20 Apr 2022 11:31:18 +0200 Subject: [PATCH 25/28] summarising integration tests --- .../workflow/.integration_test-gitlab-ci.yaml | 319 +++++++----------- 1 file changed, 120 insertions(+), 199 deletions(-) diff --git a/.gitlab/workflow/.integration_test-gitlab-ci.yaml b/.gitlab/workflow/.integration_test-gitlab-ci.yaml index be10cf37..1a44d819 100644 --- a/.gitlab/workflow/.integration_test-gitlab-ci.yaml +++ b/.gitlab/workflow/.integration_test-gitlab-ci.yaml @@ -25,147 +25,63 @@ junit: report.xml -test_preprocessing_dips_count: - <<: *test_template - stage: integration_test_preprocessing - script: - - pytest --cov=./ --cov-report= ./umami/tests/integration/test_preprocessing.py -k "test_preprocessing_dips_count" -v -s --junitxml=report.xml - - cp .coverage ./coverage_files/.coverage.test_preprocessing_dips_count - artifacts: - <<: *artifact_template - paths: - - plots/ - - test_preprocessing_dips/ - - coverage_files/ - -test_preprocessing_dl1r_count: - <<: *test_template - stage: integration_test_preprocessing - script: - - pytest --cov=./ --cov-report= ./umami/tests/integration/test_preprocessing.py -k "test_preprocessing_dl1r_count" -v -s --junitxml=report.xml - - cp .coverage ./coverage_files/.coverage.test_preprocessing_dl1r_count - artifacts: - <<: *artifact_template - paths: - - plots/ - - test_preprocessing_dl1r/ - - coverage_files/ - -test_preprocessing_umami_count: - <<: *test_template - stage: integration_test_preprocessing - script: - - pytest --cov=./ --cov-report= ./umami/tests/integration/test_preprocessing.py -k "test_preprocessing_umami_count" -v -s --junitxml=report.xml - - cp .coverage ./coverage_files/.coverage.test_preprocessing_umami_count - artifacts: - <<: *artifact_template - paths: - - plots/ - - test_preprocessing_umami/ - - coverage_files/ - -test_preprocessing_dips_pdf: - <<: *test_template - stage: integration_test_preprocessing - script: - - pytest --cov=./ --cov-report= ./umami/tests/integration/test_preprocessing.py -k "test_preprocessing_dips_pdf" -v -s --junitxml=report.xml - - cp .coverage ./coverage_files/.coverage.test_preprocessing_dips_pdf - artifacts: - <<: *artifact_template - paths: - - plots/ - - test_preprocessing_dips_pdf/ - - coverage_files/ - -test_preprocessing_dips_four_classes_pdf: - <<: *test_template - stage: integration_test_preprocessing - script: - - pytest --cov=./ --cov-report= ./umami/tests/integration/test_preprocessing.py -k "test_preprocessing_dips_four_classes_pdf" -v -s --junitxml=report.xml - - cp .coverage ./coverage_files/.coverage.test_preprocessing_dips_four_classes_pdf - artifacts: - <<: *artifact_template - paths: - - plots/ - - test_preprocessing_dips/ - - coverage_files/ - -test_preprocessing_dl1r_pdf: - <<: *test_template - stage: integration_test_preprocessing - script: - - pytest --cov=./ --cov-report= ./umami/tests/integration/test_preprocessing.py -k "test_preprocessing_dl1r_pdf" -v -s --junitxml=report.xml - - cp .coverage ./coverage_files/.coverage.test_preprocessing_dl1r_pdf - artifacts: - <<: *artifact_template - paths: - - plots/ - - test_preprocessing_dl1r_pdf/ - - coverage_files/ - -test_preprocessing_umami_pdf: +.test_preprocessing: &test_preprocessing <<: *test_template stage: integration_test_preprocessing + before_script: + - |- + if [[ $TEST_PATH == "" ]]; then + TEST_PATH=$TEST_NAME + echo $TEST_PATH + fi script: - - pytest --cov=./ --cov-report= ./umami/tests/integration/test_preprocessing.py -k "test_preprocessing_umami_pdf" -v -s --junitxml=report.xml - - cp .coverage ./coverage_files/.coverage.test_preprocessing_umami_pdf + - pytest --cov=./ --cov-report= ./umami/tests/integration/test_preprocessing.py -k "$TEST_NAME" -v -s --junitxml=report.xml + - cp .coverage ./coverage_files/.coverage.$TEST_NAME artifacts: <<: *artifact_template paths: - plots/ - - test_preprocessing_umami_pdf/ + - $TEST_PATH/ - coverage_files/ -test_preprocessing_dips_weighting: - <<: *test_template - stage: integration_test_preprocessing - script: - - pytest --cov=./ --cov-report= ./umami/tests/integration/test_preprocessing.py -k "test_preprocessing_dips_weighting" -v -s --junitxml=report.xml - - cp .coverage ./coverage_files/.coverage.test_preprocessing_dips_weighting - artifacts: - <<: *artifact_template - paths: - - plots/ - - test_preprocessing_dips_weighting/ - - coverage_files/ +test_preprocessing_parallel: + <<: *test_preprocessing + parallel: + matrix: + - TEST_PATH: test_preprocessing_dips + - TEST_NAME: + - test_preprocessing_dips_four_classes_pdf + - test_preprocessing_dips_count + - TEST_NAME: + - test_preprocessing_dips_pdf + - test_preprocessing_dips_weighting + - test_preprocessing_dl1r_pdf + - test_preprocessing_dl1r_weighting + - test_preprocessing_umami_pdf + - test_preprocessing_umami_weighting + - TEST_NAME: test_preprocessing_dl1r_count + TEST_PATH: test_preprocessing_dl1r + - TEST_NAME: test_preprocessing_umami_count + TEST_PATH: test_preprocessing_umami -test_preprocessing_dl1r_weighting: - <<: *test_template - stage: integration_test_preprocessing - script: - - pytest --cov=./ --cov-report= ./umami/tests/integration/test_preprocessing.py -k "test_preprocessing_dl1r_weighting" -v -s --junitxml=report.xml - - cp .coverage ./coverage_files/.coverage.test_preprocessing_dl1r_weighting - artifacts: - <<: *artifact_template - paths: - - plots/ - - test_preprocessing_dl1r_weighting/ - - coverage_files/ - -test_preprocessing_umami_weighting: - <<: *test_template - stage: integration_test_preprocessing - script: - - pytest --cov=./ --cov-report= ./umami/tests/integration/test_preprocessing.py -k "test_preprocessing_umami_weighting" -v -s --junitxml=report.xml - - cp .coverage ./coverage_files/.coverage.test_preprocessing_umami_weighting - artifacts: - <<: *artifact_template - paths: - - plots/ - - test_preprocessing_umami_weighting/ - - coverage_files/ +.test_train_dependencies: &test_train_dependencies + dependencies: + - test_preprocessing_parallel + needs: + - test_preprocessing_parallel test_train_dips: <<: *test_template stage: integration_test_tagger - needs: - - test_preprocessing_dips_count - - test_preprocessing_dips_pdf - - test_preprocessing_dips_weighting - dependencies: - - test_preprocessing_dips_count - - test_preprocessing_dips_pdf - - test_preprocessing_dips_weighting + # needs: + # - test_preprocessing_dips_count + # - test_preprocessing_dips_pdf + # - test_preprocessing_dips_weighting + # dependencies: + # - test_preprocessing_dips_count + # - test_preprocessing_dips_pdf + # - test_preprocessing_dips_weighting + <<: *test_train_dependencies script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_train.py -v -s --junitxml=report.xml -k "test_train_dips_no_attention" - cp .coverage ./coverage_files/.coverage.test_train_dips @@ -178,10 +94,11 @@ test_train_dips: test_train_dips_four_classes: <<: *test_template stage: integration_test_tagger - needs: - - test_preprocessing_dips_four_classes_pdf - dependencies: - - test_preprocessing_dips_four_classes_pdf + # needs: + # - test_preprocessing_dips_four_classes_pdf + # dependencies: + # - test_preprocessing_dips_four_classes_pdf + <<: *test_train_dependencies script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_train.py -v -s --junitxml=report.xml -k "test_train_dips_four_classes" - cp .coverage ./coverage_files/.coverage.test_train_dips_four_classes @@ -194,14 +111,15 @@ test_train_dips_four_classes: test_train_tfrecords_dips: <<: *test_template stage: integration_test_tagger - needs: - - test_preprocessing_dips_count - - test_preprocessing_dips_pdf - - test_preprocessing_dips_weighting - dependencies: - - test_preprocessing_dips_count - - test_preprocessing_dips_pdf - - test_preprocessing_dips_weighting + # needs: + # - test_preprocessing_dips_count + # - test_preprocessing_dips_pdf + # - test_preprocessing_dips_weighting + # dependencies: + # - test_preprocessing_dips_count + # - test_preprocessing_dips_pdf + # - test_preprocessing_dips_weighting + <<: *test_train_dependencies script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_train.py -v -s --junitxml=report.xml -k "test_train_tfrecords_dips" - cp .coverage ./coverage_files/.coverage.test_train_tfrecords_dips @@ -214,14 +132,15 @@ test_train_tfrecords_dips: test_train_cads: <<: *test_template stage: integration_test_tagger - needs: - - test_preprocessing_umami_count - - test_preprocessing_umami_pdf - - test_preprocessing_umami_weighting - dependencies: - - test_preprocessing_umami_count - - test_preprocessing_umami_pdf - - test_preprocessing_umami_weighting + # needs: + # - test_preprocessing_umami_count + # - test_preprocessing_umami_pdf + # - test_preprocessing_umami_weighting + # dependencies: + # - test_preprocessing_umami_count + # - test_preprocessing_umami_pdf + # - test_preprocessing_umami_weighting + <<: *test_train_dependencies script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_train.py -v -s --junitxml=report.xml -k "test_train_cads" - cp .coverage ./coverage_files/.coverage.test_train_cads @@ -234,14 +153,15 @@ test_train_cads: test_train_tfrecords_cads: <<: *test_template stage: integration_test_tagger - needs: - - test_preprocessing_umami_count - - test_preprocessing_umami_pdf - - test_preprocessing_umami_weighting - dependencies: - - test_preprocessing_umami_count - - test_preprocessing_umami_pdf - - test_preprocessing_umami_weighting + # needs: + # - test_preprocessing_umami_count + # - test_preprocessing_umami_pdf + # - test_preprocessing_umami_weighting + # dependencies: + # - test_preprocessing_umami_count + # - test_preprocessing_umami_pdf + # - test_preprocessing_umami_weighting + <<: *test_train_dependencies script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_train.py -v -s --junitxml=report.xml -k "test_train_tfrecords_cads" - cp .coverage ./coverage_files/.coverage.test_train_tfrecords_cads @@ -254,14 +174,15 @@ test_train_tfrecords_cads: test_train_dl1r: <<: *test_template stage: integration_test_tagger - needs: - - test_preprocessing_dl1r_count - - test_preprocessing_dl1r_pdf - - test_preprocessing_dl1r_weighting - dependencies: - - test_preprocessing_dl1r_count - - test_preprocessing_dl1r_pdf - - test_preprocessing_dl1r_weighting + # needs: + # - test_preprocessing_dl1r_count + # - test_preprocessing_dl1r_pdf + # - test_preprocessing_dl1r_weighting + # dependencies: + # - test_preprocessing_dl1r_count + # - test_preprocessing_dl1r_pdf + # - test_preprocessing_dl1r_weighting + <<: *test_train_dependencies script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_train.py -v -s --junitxml=report.xml -k "test_train_dl1r" - cp .coverage ./coverage_files/.coverage.test_train_dl1r @@ -274,14 +195,15 @@ test_train_dl1r: test_train_umami: <<: *test_template stage: integration_test_tagger - needs: - - test_preprocessing_umami_count - - test_preprocessing_umami_pdf - - test_preprocessing_umami_weighting - dependencies: - - test_preprocessing_umami_count - - test_preprocessing_umami_pdf - - test_preprocessing_umami_weighting + # needs: + # - test_preprocessing_umami_count + # - test_preprocessing_umami_pdf + # - test_preprocessing_umami_weighting + # dependencies: + # - test_preprocessing_umami_count + # - test_preprocessing_umami_pdf + # - test_preprocessing_umami_weighting + <<: *test_train_dependencies script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_train.py -v -s --junitxml=report.xml -k "test_train_umami" - cp .coverage ./coverage_files/.coverage.test_train_umami @@ -294,14 +216,15 @@ test_train_umami: test_train_tfrecords_umami: <<: *test_template stage: integration_test_tagger - needs: - - test_preprocessing_umami_count - - test_preprocessing_umami_pdf - - test_preprocessing_umami_weighting - dependencies: - - test_preprocessing_umami_count - - test_preprocessing_umami_pdf - - test_preprocessing_umami_weighting + # needs: + # - test_preprocessing_umami_count + # - test_preprocessing_umami_pdf + # - test_preprocessing_umami_weighting + # dependencies: + # - test_preprocessing_umami_count + # - test_preprocessing_umami_pdf + # - test_preprocessing_umami_weighting + <<: *test_train_dependencies script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_train.py -v -s --junitxml=report.xml -k "test_train_tfrecords_umami" - cp .coverage ./coverage_files/.coverage.test_train_tfrecords_umami @@ -314,14 +237,15 @@ test_train_tfrecords_umami: test_train_cond_att_umami: <<: *test_template stage: integration_test_tagger - needs: - - test_preprocessing_umami_count - - test_preprocessing_umami_pdf - - test_preprocessing_umami_weighting - dependencies: - - test_preprocessing_umami_count - - test_preprocessing_umami_pdf - - test_preprocessing_umami_weighting + # needs: + # - test_preprocessing_umami_count + # - test_preprocessing_umami_pdf + # - test_preprocessing_umami_weighting + # dependencies: + # - test_preprocessing_umami_count + # - test_preprocessing_umami_pdf + # - test_preprocessing_umami_weighting + <<: *test_train_dependencies script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_train.py -v -s --junitxml=report.xml -k "test_train_cond_att_umami" - cp .coverage ./coverage_files/.coverage.test_train_cond_att_umami @@ -334,14 +258,15 @@ test_train_cond_att_umami: test_train_tfrecords_cond_att_umami: <<: *test_template stage: integration_test_tagger - needs: - - test_preprocessing_umami_count - - test_preprocessing_umami_pdf - - test_preprocessing_umami_weighting - dependencies: - - test_preprocessing_umami_count - - test_preprocessing_umami_pdf - - test_preprocessing_umami_weighting + # needs: + # - test_preprocessing_umami_count + # - test_preprocessing_umami_pdf + # - test_preprocessing_umami_weighting + # dependencies: + # - test_preprocessing_umami_count + # - test_preprocessing_umami_pdf + # - test_preprocessing_umami_weighting + <<: *test_train_dependencies script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_train.py -v -s --junitxml=report.xml -k "test_train_tfrecords_cond_att_umami" - cp .coverage ./coverage_files/.coverage.test_train_tfrecords_cond_att_umami @@ -366,10 +291,6 @@ test_evaluate_tagger_in_files: test_plot_input_vars: <<: *test_template stage: integration_test_plotting - dependencies: - - linter - needs: - - linter script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_input_vars_plot.py -v -s --junitxml=report.xml - cp .coverage ./coverage_files/.coverage.test_input_vars_plot -- GitLab From 26c5c76bf0e2d91b2f8609a9a9c9c2dd0a59b109 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Wed, 20 Apr 2022 11:48:45 +0200 Subject: [PATCH 26/28] ci simplification --- .../workflow/.integration_test-gitlab-ci.yaml | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/.gitlab/workflow/.integration_test-gitlab-ci.yaml b/.gitlab/workflow/.integration_test-gitlab-ci.yaml index 1a44d819..e401ab68 100644 --- a/.gitlab/workflow/.integration_test-gitlab-ci.yaml +++ b/.gitlab/workflow/.integration_test-gitlab-ci.yaml @@ -28,12 +28,6 @@ .test_preprocessing: &test_preprocessing <<: *test_template stage: integration_test_preprocessing - before_script: - - |- - if [[ $TEST_PATH == "" ]]; then - TEST_PATH=$TEST_NAME - echo $TEST_PATH - fi script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_preprocessing.py -k "$TEST_NAME" -v -s --junitxml=report.xml - cp .coverage ./coverage_files/.coverage.$TEST_NAME @@ -41,28 +35,24 @@ <<: *artifact_template paths: - plots/ - - $TEST_PATH/ + - test_preprocessing_*/ - coverage_files/ test_preprocessing_parallel: <<: *test_preprocessing parallel: matrix: - - TEST_PATH: test_preprocessing_dips - TEST_NAME: - - test_preprocessing_dips_four_classes_pdf - test_preprocessing_dips_count - - TEST_NAME: + - test_preprocessing_dips_four_classes_pdf - test_preprocessing_dips_pdf - test_preprocessing_dips_weighting + - test_preprocessing_dl1r_count - test_preprocessing_dl1r_pdf - test_preprocessing_dl1r_weighting + - test_preprocessing_umami_count - test_preprocessing_umami_pdf - test_preprocessing_umami_weighting - - TEST_NAME: test_preprocessing_dl1r_count - TEST_PATH: test_preprocessing_dl1r - - TEST_NAME: test_preprocessing_umami_count - TEST_PATH: test_preprocessing_umami .test_train_dependencies: &test_train_dependencies dependencies: -- GitLab From 593a4264dbf25d9ce22c8bf1bfc4e26073735265 Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Wed, 20 Apr 2022 13:00:03 +0200 Subject: [PATCH 27/28] ci fixes --- .../workflow/.integration_test-gitlab-ci.yaml | 43 +++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/.gitlab/workflow/.integration_test-gitlab-ci.yaml b/.gitlab/workflow/.integration_test-gitlab-ci.yaml index e401ab68..81ef29fd 100644 --- a/.gitlab/workflow/.integration_test-gitlab-ci.yaml +++ b/.gitlab/workflow/.integration_test-gitlab-ci.yaml @@ -28,6 +28,12 @@ .test_preprocessing: &test_preprocessing <<: *test_template stage: integration_test_preprocessing + before_script: + - |- + if [[ $TEST_PATH == "" ]]; then + TEST_PATH=$TEST_NAME + echo $TEST_PATH + fi script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_preprocessing.py -k "$TEST_NAME" -v -s --junitxml=report.xml - cp .coverage ./coverage_files/.coverage.$TEST_NAME @@ -35,24 +41,32 @@ <<: *artifact_template paths: - plots/ - - test_preprocessing_*/ + - $TEST_PATH/ - coverage_files/ test_preprocessing_parallel: <<: *test_preprocessing parallel: matrix: + - TEST_PATH: test_preprocessing_dips + TEST_NAME: + - test_preprocessing_dips_four_classes_pdf + - test_preprocessing_dips_count - TEST_NAME: - test_preprocessing_dips_count - test_preprocessing_dips_four_classes_pdf - test_preprocessing_dips_pdf - test_preprocessing_dips_weighting - - test_preprocessing_dl1r_count - test_preprocessing_dl1r_pdf - test_preprocessing_dl1r_weighting - - test_preprocessing_umami_count - test_preprocessing_umami_pdf - test_preprocessing_umami_weighting + - TEST_NAME: test_preprocessing_dl1r_count + TEST_PATH: test_preprocessing_dl1r + - TEST_NAME: test_preprocessing_umami_count + TEST_PATH: test_preprocessing_umami + + .test_train_dependencies: &test_train_dependencies dependencies: @@ -60,6 +74,29 @@ test_preprocessing_parallel: needs: - test_preprocessing_parallel + +.test_train: &test_train + <<: *test_template + stage: integration_test_tagger + # needs: + # - test_preprocessing_dips_count + # - test_preprocessing_dips_pdf + # - test_preprocessing_dips_weighting + # dependencies: + # - test_preprocessing_dips_count + # - test_preprocessing_dips_pdf + # - test_preprocessing_dips_weighting + <<: *test_train_dependencies + script: + - pytest --cov=./ --cov-report= ./umami/tests/integration/test_train.py -v -s --junitxml=report.xml -k "test_train_dips_no_attention" + - cp .coverage ./coverage_files/.coverage.test_train_dips + artifacts: + <<: *artifact_template + paths: + - test_*/ + - coverage_files/ + + test_train_dips: <<: *test_template stage: integration_test_tagger -- GitLab From ebdc76c905202d6839387296e3f38bcc99e25ace Mon Sep 17 00:00:00 2001 From: Manuel Guth Date: Wed, 20 Apr 2022 13:34:02 +0200 Subject: [PATCH 28/28] fixing before script --- .gitlab/workflow/.integration_test-gitlab-ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab/workflow/.integration_test-gitlab-ci.yaml b/.gitlab/workflow/.integration_test-gitlab-ci.yaml index 81ef29fd..9879cb1e 100644 --- a/.gitlab/workflow/.integration_test-gitlab-ci.yaml +++ b/.gitlab/workflow/.integration_test-gitlab-ci.yaml @@ -34,6 +34,7 @@ TEST_PATH=$TEST_NAME echo $TEST_PATH fi + - . run_setup.sh script: - pytest --cov=./ --cov-report= ./umami/tests/integration/test_preprocessing.py -k "$TEST_NAME" -v -s --junitxml=report.xml - cp .coverage ./coverage_files/.coverage.$TEST_NAME -- GitLab