Commit 1fd1739f authored by toschroe's avatar toschroe
Browse files

keep Add Var feature

parents 6c37ee8d dc039d11
Pipeline #3367904 failed with stages
in 2 minutes and 27 seconds
......@@ -23,7 +23,7 @@ stages:
checking_mr:
stage: check_mr
image: python:3.7-slim
image: python:3.8-slim
before_script:
- pip install --upgrade pip setuptools wheel
- pip install python-gitlab
......@@ -35,7 +35,7 @@ checking_mr:
linter:
stage: linting
image: python:3.7-slim
image: python:3.8-slim
script:
- mkdir coverage_files/
- pip install flake8
......@@ -59,7 +59,7 @@ yaml_linter:
doc_string_check:
stage: linting
image: python:3.7-slim
image: python:3.8-slim
script:
- pip install darglint
- darglint --list-errors
......@@ -69,6 +69,19 @@ doc_string_check:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
allow_failure: True
pylint:
stage: linting
image: python:3.8-slim
before_script:
- pip install pylint==2.12.2
script:
- pylint ./umami
rules:
- if: $CI_COMMIT_BRANCH != ''
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
allow_failure: True
include:
- 'pipelines/.unit_test-gitlab-ci.yaml'
- 'pipelines/.docker-gitlab-ci.yaml'
......
......@@ -14,6 +14,7 @@ repos:
language: system
entry: black
types: [python]
args: ["--experimental-string-processing"]
- id: flake8
name: flake8
stages: [commit]
......
FROM UPDATED_IN_GITLAB_CI_JOB
FROM ${BASE_IMAGE}
COPY . /umami
WORKDIR /umami
......
FROM UPDATED_IN_GITLAB_CI_JOB
FROM ${BASE_IMAGE}
## ensure locale is set during build
ENV LANG C.UTF-8
......
......@@ -4,7 +4,7 @@
test_coverage:
stage: coverage_test_stage
image: python:3.7-slim
image: python:3.8-slim
script:
- pip install --upgrade pip setuptools wheel
- pip install pytest==6.2.4
......@@ -46,7 +46,7 @@ coverage_html_report:
test_coverage_post_report:
stage: publish
image: python:3.7-slim
image: python:3.8-slim
needs: ["test_coverage"]
before_script:
- pip install --upgrade pip setuptools wheel
......
......@@ -4,10 +4,21 @@
.image_build_template: &image_build_template
script:
# The script gets overwritten in jobs tagged docker-image-build
- ignore
tags:
- docker-image-build
# Prepare Kaniko configuration file
- echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json
# Build and push the image from the Dockerfile at the root of the project.
- /kaniko/executor --context $CI_PROJECT_DIR
--dockerfile ${DOCKER_FILE}
--build-arg ${BASE}
--destination ${IMAGE_DESTINATION}
# Print the full registry path of the pushed image
- echo "Image pushed successfully to ${IMAGE_DESTINATION}"
image:
# We recommend using the CERN version of the Kaniko image: gitlab-registry.cern.ch/ci-tools/docker-image-builder
name: gitlab-registry.cern.ch/ci-tools/docker-image-builder
entrypoint: [""]
retry: 2
......@@ -15,67 +26,67 @@ build_umamibase_cpu:
<<: *image_build_template
stage: image_build_umamibase
variables:
BASE: 'BASE_IMAGE=tensorflow/tensorflow:$TFTAG'
DOCKER_FILE: docker/umamibase/Dockerfile
FROM: tensorflow/tensorflow:$TFTAG
TO: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:latest'
IMAGE_DESTINATION: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:latest'
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_COMMIT_TAG
variables:
TO: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:$CI_COMMIT_REF_SLUG'
IMAGE_DESTINATION: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:$CI_COMMIT_REF_SLUG'
build_umamibase_gpu:
<<: *image_build_template
stage: builds
variables:
BASE: 'BASE_IMAGE=tensorflow/tensorflow:$TFTAG-gpu'
DOCKER_FILE: docker/umamibase/Dockerfile
FROM: tensorflow/tensorflow:$TFTAG-gpu
TO: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:latest-gpu'
IMAGE_DESTINATION: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:latest-gpu'
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_COMMIT_TAG
variables:
TO: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:$CI_COMMIT_REF_SLUG-gpu'
IMAGE_DESTINATION: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:$CI_COMMIT_REF_SLUG-gpu'
build_umamibase_gpu_pytorch:
<<: *image_build_template
stage: builds
variables:
BASE: 'BASE_IMAGE=pytorch/pytorch:$TORCHTAG'
DOCKER_FILE: docker/umamibase/Dockerfile
FROM: pytorch/pytorch:$TORCHTAG
TO: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:latest-pytorch-gpu'
IMAGE_DESTINATION: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:latest-pytorch-gpu'
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
# Umami images: use base image as a foundation to speed up build process
build_umami_cpu:
<<: *image_build_template
stage: image_build_umami
variables:
DOCKER_FILE: docker/umami/Dockerfile
FROM: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:latest'
TO: $CI_REGISTRY_IMAGE:latest
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_COMMIT_TAG
variables:
FROM: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:$CI_COMMIT_REF_SLUG'
TO: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
<<: *image_build_template
stage: image_build_umami
variables:
BASE: 'BASE_IMAGE=${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:latest'
DOCKER_FILE: docker/umami/Dockerfile
IMAGE_DESTINATION: '${CI_REGISTRY_IMAGE}:latest'
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_COMMIT_TAG
variables:
BASE: 'BASE_IMAGE=${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:$CI_COMMIT_REF_SLUG'
IMAGE_DESTINATION: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
build_umami_gpu:
<<: *image_build_template
stage: image_build_umami
variables:
DOCKER_FILE: docker/umami/Dockerfile
FROM: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:latest-gpu'
TO: $CI_REGISTRY_IMAGE:latest-gpu
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_COMMIT_TAG
variables:
FROM: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:$CI_COMMIT_REF_SLUG-gpu'
TO: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG-gpu
<<: *image_build_template
stage: image_build_umami
variables:
BASE: 'BASE_IMAGE=${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:latest-gpu'
DOCKER_FILE: docker/umami/Dockerfile
IMAGE_DESTINATION: '${CI_REGISTRY_IMAGE}:latest-gpu'
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_COMMIT_TAG
variables:
BASE: 'BASE_IMAGE=${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:$CI_COMMIT_REF_SLUG-gpu'
IMAGE_DESTINATION: '$CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG-gpu'
# ----------------------------------------------------------------------------
......@@ -92,9 +103,9 @@ build_umamibase_cpu_MR:
<<: *image_build_template
stage: image_build_umamibase
variables:
BASE: 'BASE_IMAGE=tensorflow/tensorflow:$TFTAG'
DOCKER_FILE: docker/umamibase/Dockerfile
FROM: tensorflow/tensorflow:$TFTAG
TO: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/temporary_images:${CI_MERGE_REQUEST_IID}-base'
IMAGE_DESTINATION: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/temporary_images:${CI_MERGE_REQUEST_IID}-base'
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_PROJECT_PATH=="atlas-flavor-tagging-tools/algorithms/umami"
......@@ -103,9 +114,9 @@ build_umamibase_gpu_MR:
<<: *image_build_template
stage: image_build_umamibase
variables:
BASE: 'BASE_IMAGE=tensorflow/tensorflow:$TFTAG-gpu'
DOCKER_FILE: docker/umamibase/Dockerfile
FROM: tensorflow/tensorflow:$TFTAG-gpu
TO: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/temporary_images:${CI_MERGE_REQUEST_IID}-gpu-base'
IMAGE_DESTINATION: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/temporary_images:${CI_MERGE_REQUEST_IID}-gpu-base'
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_PROJECT_PATH=="atlas-flavor-tagging-tools/algorithms/umami"
when: manual
......
......@@ -148,6 +148,21 @@ test_train_dips:
- test_dips_model/
- coverage_files/
test_train_dips_tfrecords:
<<: *test_template
stage: integration_test_tagger
needs: ["test_preprocessing_dips"]
dependencies:
- test_preprocessing_dips
script:
- pytest --cov=./ --cov-report= ./umami/tests/integration/test_train_dips.py -v -s --junitxml=report.xml -k "test_tfrecords_train_dips"
- cp .coverage ./coverage_files/.coverage.test_train_dips_tfrecords
artifacts:
<<: *artifact_template
paths:
- test_dips_model_tfrecords/
- coverage_files/
test_train_dips_cond_att:
<<: *test_template
stage: integration_test_tagger
......@@ -205,7 +220,7 @@ test_train_umami_tfrecords:
artifacts:
<<: *artifact_template
paths:
- test_umami_model/
- test_umami_model_tfrecords/
- coverage_files/
test_plot_input_vars:
......
[tool.black]
line-length = 79
target-version = ['py37']
line-length = 88
target-version = ['py38']
include = '\.pyi?$'
exclude = '''
/(
......
......@@ -21,6 +21,7 @@ partd==1.2.0
Pillow==8.2.0
pre-commit==2.12.1
pydot==1.4.2
pylint==2.12.2
pytest-cov==2.12.0
pytest==6.2.4
ruamel.yaml==0.17.10
......@@ -35,3 +36,4 @@ xarray==0.16.2
xhistogram==0.1.3
yamllint==1.26.2
zarr==2.8.1
python-gitlab
......@@ -3,12 +3,18 @@ multi_line_output=3
include_trailing_comma=True
force_grid_wrap=0
use_parentheses=True
line_length=79
line_length=88
[flake8]
ignore = E203, E266, E501, W503
max-line-length = 79
ignore = E203, E266, W503
max-line-length = 88
select = B,C,E,F,W,T4
[darglint]
ignore=DAR203
\ No newline at end of file
ignore=DAR203
[pylint.FORMAT]
max-line-length = 88
[pylint.'MESSAGES CONTROL']
disable = missing-docstring,invalid-name,unspecified-encoding,wrong-import-order,logging-fstring-interpolation,no-name-in-module,too-many-arguments,too-many-locals,too-many-lines,no-member,too-many-statements,too-many-branches
\ No newline at end of file
......@@ -8,11 +8,16 @@ from umami.tools import yaml_loader
class Configuration(object):
"""This is a global configuration to allow certain settings which are hardcoded so far."""
"""
This is a global configuration to allow certain settings which are
hardcoded so far.
"""
def __init__(self, yaml_config=None):
super(Configuration, self).__init__()
self.yaml_config = f"{pathlib.Path(__file__).parent.absolute()}/../configs/global_config.yaml"
self.yaml_config = (
f"{pathlib.Path(__file__).parent.absolute()}/../configs/global_config.yaml"
)
self.LoadConfigFile()
self.logger = self.SetLoggingLevel()
self.SetTFDebugLevel()
......@@ -37,9 +42,7 @@ class Configuration(object):
self.logger.debug(f"Setting {item} to {self.config[item]}.")
setattr(self, item, self.config[item])
else:
raise KeyError(
f"You need to specify {item} in your" " config file!"
)
raise KeyError(f"You need to specify {item} in your config file!")
def SetMPLPlottingBackend(self):
"""Setting the plotting backend of matplotlib."""
......@@ -53,9 +56,7 @@ class Configuration(object):
def SetTFDebugLevel(self):
"""Setting the Debug level of tensorflow.
For reference see https://stackoverflow.com/questions/35869137/avoid-tensorflow-print-on-standard-error""" # noqa
self.logger.debug(
f"Setting TFDebugLevel to {self.config['TFDebugLevel']}"
)
self.logger.debug(f"Setting TFDebugLevel to {self.config['TFDebugLevel']}")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(self.config["TFDebugLevel"])
def SetLoggingLevel(self):
......@@ -73,7 +74,8 @@ class Configuration(object):
logger.setLevel(log_levels[self.config["DebugLevel"]])
else:
logging.error(
f"The 'DebugLevel' option {self.config['DebugLevel']} set in the global config is not valid."
f"The 'DebugLevel' option {self.config['DebugLevel']} set in"
" the global config is not valid."
)
ch = logging.StreamHandler()
ch.setLevel(log_levels[self.config["DebugLevel"]])
......@@ -95,7 +97,9 @@ class CustomFormatter(logging.Formatter):
red = "\x1b[31;21m"
bold_red = "\x1b[31;1m"
reset = "\x1b[0m"
debugformat = "%(asctime)s - %(levelname)s:%(name)s: %(message)s (%(filename)s:%(lineno)d)"
debugformat = (
"%(asctime)s - %(levelname)s:%(name)s: %(message)s (%(filename)s:%(lineno)d)"
)
format = "%(levelname)s:%(name)s: %(message)s"
FORMATS = {
......
......@@ -27,9 +27,7 @@ def GetParser():
-------
args: parse_args
"""
parser = argparse.ArgumentParser(
description="Preprocessing command line options."
)
parser = argparse.ArgumentParser(description="Preprocessing command line options.")
parser.add_argument(
"-c",
......@@ -75,8 +73,37 @@ def GetParser():
def EvaluateModel(
args, train_config, preprocess_config, test_file, data_set_name
args: object,
train_config: object,
preprocess_config: object,
test_file: str,
data_set_name: str,
):
"""
Evaluate only the taggers in the files or also the UMAMI tagger.
Parameters
----------
args : object
Loaded argparser.
train_config : object
Loaded train config.
preprocess_config : object
Loaded preprocessing config.
test_file : str
Path to the files which are to be tested. Wildcards are supported.
data_set_name : str
Dataset name for the results files. The results will be saved in
dicts. The key will be this dataset name.
Raises
------
ValueError
If no epoch is given when evaluating UMAMI.
ValueError
If the given tagger argument in train config is not a list.
"""
# Get train parameters
Eval_params = train_config.Eval_parameters_validation
class_labels = train_config.NN_structure["class_labels"]
......@@ -84,8 +111,7 @@ def EvaluateModel(
frac_values_comp = Eval_params["frac_values_comp"]
var_cuts = (
Eval_params["variable_cuts"][f"{data_set_name}"]
if "variable_cuts" in Eval_params
and Eval_params["variable_cuts"] is not None
if "variable_cuts" in Eval_params and Eval_params["variable_cuts"] is not None
else None
)
......@@ -102,7 +128,8 @@ def EvaluateModel(
except AttributeError:
Eval_model_bool = True
# Set epoch to use for evaluation of trained model or dummy value if tagger scores from derivations should be used
# Set epoch to use for evaluation of trained model or dummy value if
# tagger scores from derivations should be used
epoch = args.epoch if Eval_model_bool else 0
# Test if multiple taggers are given or not
......@@ -124,9 +151,7 @@ def EvaluateModel(
# evaluate trained model file (for evaluate_trained_model: True in config)
if Eval_model_bool:
if epoch is None:
raise ValueError(
"You need to give an epoch which is to be evaluated!"
)
raise ValueError("You need to give an epoch which is to be evaluated!")
# Get model file path
model_file = utt.GetModelPath(
......@@ -239,36 +264,64 @@ def EvaluateModel(
if tagger_preds != []
else None,
frac_values_comp=frac_values_comp,
eff_min=0.49
if "eff_min" not in Eval_params
else Eval_params["eff_min"],
eff_max=1.0
if "eff_max" not in Eval_params
else Eval_params["eff_max"],
eff_min=0.49 if "eff_min" not in Eval_params else Eval_params["eff_min"],
eff_max=1.0 if "eff_max" not in Eval_params else Eval_params["eff_max"],
)
df_eff_rej = pd.DataFrame(tagger_rej_dicts)
del tagger_rej_dicts
df_eff_rej.to_hdf(
f"{train_config.model_name}/results/results-rej_per_eff"
f"-{epoch}.h5",
f"{train_config.model_name}/results/results-rej_per_eff-{epoch}.h5",
data_set_name,
)
# Save the number of jets in the test file to the h5 file.
# This is needed to calculate the binomial errors
with h5py.File(
f"{train_config.model_name}/results/"
+ f"results-rej_per_eff-{epoch}.h5",
f"{train_config.model_name}/results/" + f"results-rej_per_eff-{epoch}.h5",
"a",
) as f:
f.attrs["N_test"] = len(jets)
def EvaluateModelDips(
args, train_config, preprocess_config, test_file, data_set_name, tagger
args: object,
train_config: object,
preprocess_config: object,
test_file: str,
data_set_name: str,
tagger: str,
):
"""
Evaluate the DIPS models.
Parameters
----------
args : object
Loaded argparser.
train_config : object
Loaded train config.
preprocess_config : object
Loaded preprocessing config.
test_file : str
Path to the files which are to be tested. Wildcards are supported.
data_set_name : str
Dataset name for the results files. The results will be saved in
dicts. The key will be this dataset name.
tagger : str
Name of the tagger that is to be evaluated. Can either be dips or
dips_cond_att depending which architecture is used.
Raises
------
ValueError
If no epoch is given when evaluating.
ValueError
If the given tagger argument in train config is neither a
list nor a string.
"""
# Check if epochs are set
if args.epoch is None:
raise ValueError("You need to give an epoch which is to be evaluated!")
......@@ -280,8 +333,7 @@ def EvaluateModelDips(
frac_values_comp = Eval_params["frac_values_comp"]
var_cuts = (
Eval_params["variable_cuts"][f"{data_set_name}"]
if "variable_cuts" in Eval_params
and Eval_params["variable_cuts"] is not None
if "variable_cuts" in Eval_params and Eval_params["variable_cuts"] is not None
else None
)
......@@ -304,9 +356,7 @@ def EvaluateModelDips(
)
# Get model file path
model_file = utt.GetModelPath(
model_name=train_config.model_name, epoch=args.epoch
)
model_file = utt.GetModelPath(model_name=train_config.model_name, epoch=args.epoch)
logger.info(f"Evaluating {model_file}")
# Check which test files need to be loaded depending on the DIPS version
......@@ -428,12 +478,8 @@ def EvaluateModelDips(
main_class=main_class,
frac_values={"dips": Eval_params["frac_values"]},
frac_values_comp=frac_values_comp,
eff_min=0.49
if "eff_min" not in Eval_params
else Eval_params["eff_min"],
eff_max=1.0
if "eff_max" not in Eval_params
else Eval_params["eff_max"],
eff_min=0.49 if "eff_min" not in Eval_params else Eval_params["eff_min"],
eff_max=1.0 if "eff_max" not in Eval_params else Eval_params["eff_max"],
)
# Form the dict to a Dataframe and save it
......@@ -441,16 +487,14 @@ def EvaluateModelDips(
del tagger_rej_dicts
df_eff_rej.to_hdf(
f"{train_config.model_name}/results/results-rej_per_eff"
f"-{args.epoch}.h5",
f"{train_config.model_name}/results/results-rej_per_eff-{args.epoch}.h5",
data_set_name,
)
# Save the number of jets in the test file to the h5 file.
# This is needed to calculate the binomial errors
with h5py.File(
f"{train_config.model_name}/results/"
+ f"results-rej_per_eff-{args.epoch}.h5",
f"{train_config.model_name}/results/" + f"results-rej_per_eff-{args.epoch}.h5",
"a",
) as f:
f.attrs["N_test"] = len(jets)
......@@ -481,13 +525,42 @@ def EvaluateModelDips(
def EvaluateModelDL1(
args,
train_config,
preprocess_config,
test_file,
data_set_name,
test_file_entry,
args: object,
train_config: object,
preprocess_config: object,
test_file: str,
data_set_name: str,
test_file_entry: str,
):
"""
Evaluate the various DL1* models.
Parameters
----------
args : object
Loaded argparser.
train_config : object
Loaded train config.
preprocess_config : object
Loaded preprocessing config.
test_file : str
Path to the files which are to be tested. Wildcards are supported.
data_set_name : str