Commit 9a9fc198 authored by Alexander Froch's avatar Alexander Froch
Browse files

Merge branch mguth-update-variable-sv with refs/heads/master into refs/merge-requests/451/train

parents 5e5f9b94 dc921be2
Pipeline #3679780 passed with stages
in 16 minutes and 45 seconds
parameters:
file_path: ./
.ntuple_path: &ntuple_path <path_to_ntuples>
preparation:
# Number of jets loaded per batch from the files for preparation.
batchsize: 5000
# Path to the .h5 ntuples from the h5 dumper.
ntuples:
ttbar:
path: *ntuple_path
file_pattern: user.alfroch.410470.btagTraining.e6337_s3681_r13144_p4931.EMPFlowAll.2022-02-07-T174158_output.h5/user.alfroch.28040424._000593.output.h5
zprime:
path: *ntuple_path
file_pattern: user.alfroch.800030.btagTraining.e7954_s3681_r13144_p4931.EMPFlowAll.2022-02-07-T174158_output.h5/user.alfroch.28040426._000179.output.h5
ttbar_test:
path: *ntuple_path
file_pattern: user.alfroch.410470.btagTraining.e6337_s3681_r13144_p4931.EMPFlowAll.2022-02-07-T174158_output.h5/user.alfroch.28040424._000572.output.h5
zprime_test:
path: *ntuple_path
file_pattern: user.alfroch.800030.btagTraining.e7954_s3681_r13144_p4931.EMPFlowAll.2022-02-07-T174158_output.h5/user.alfroch.28040426._000039.output.h5
samples:
ttbar_inclusive:
type: ttbar
category: inclusive
n_jets: 85000
f_output:
path: ci/
file: ci_ttbar_basefile.h5
zprime_inclusive:
type: zprime
category: inclusive
n_jets: 50000
f_output:
path: ci/
file: ci_zpext_basefile.h5
ttbar_inclusive_testing:
type: ttbar_test
category: inclusive
cuts:
- eventNumber:
operator: mod_6_==
condition: 5
n_jets: 15000
f_output:
path: ci/
file: ci_ttbar_testing.h5
zprime_inclusive_testing:
type: zprime_test
category: inclusive
cuts:
- eventNumber:
operator: mod_6_==
condition: 5
n_jets: 10000
f_output:
path: ci/
file: ci_zpext_testing.h5
sampling:
method: Null
options:
save_tracks: True
tracks_names: ["tracks", "tracks_loose"]
outfile_name: null
plot_name: null
var_file: null
dict_file: null
compression: null
precision: float16
convert_to_tfrecord:
chunk_size: 5000
N_add_vars: null
......@@ -4,10 +4,12 @@
### Latest
- Adding a check for `replaceLineInFile` if leading spaces stay same, if not a warning is raised [!451](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/451)
- Allowing that no cuts are provided for samples in the preprocessing step [!451](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/451)
- Updating jet training variable from `SV1_significance3d` to `SV1_correctSignificance3d` for r22 [!451](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/451)
- Restructuring gitlab CI file structure and adding MR/issue templates [!463](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/463)
- Removing spectator variables from variable configs and fixing `exclude` option in training [!461](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/461)
- Adding `atlasify` to requirements [!458](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/458)
- Updating jet training variable from `SV1_significance3d` to `SV1_correctSignificance3d` for r22 [!451](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/451)
- Supprting binariser for 2 class labels to have still one hot encoding [!409](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/409)
- Variable plots for preprocessing stages added [!440](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/440)
- Update TFRecord reader/writer + Adding support for CADS and Umami Cond Att [!444](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/444)
......
......@@ -87,3 +87,22 @@ There is a [global configuration](https://gitlab.cern.ch/atlas-flavor-tagging-to
| `DebugLevel` | Defines the debug level. Possible values: `DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL` |
| `TFDebugLevel` | Defines the debug level of tensorflow, it takes integer values [0,1,2,3], where 0 prints all messages. |
## Updating CI files
In certain cases it is necessary to update the CI files located e.g. in `/eos/user/u/umamibot/www/ci/preprocessing`.
To prepare these preprocessing files you can use the provided config in `.gitlab/workflow/ci-preprocessing.yaml` and create the files then via
```bash
preprocessing.py -c .gitlab/workflow/ci-preprocessing.yaml --prepare
```
This will give you 5 different files
- `ci_ttbar_basefile.h5`
- `ci_ttbar_testing.h5`
- `ci_zpext_basefile.h5`
- `ci_zpext_testing.h5`
To copy them to the `eos` area, please ask one of the umami responsibles.
\ No newline at end of file
......@@ -37,7 +37,7 @@ train_variables:
- SV1_deltaR
- SV1_Lxy
- SV1_L3d
- SV1_significance3d
- SV1_correctSignificance3d # previously SV1_significance3d
IP2D:
- IP2D_isDefaults
- IP2D_bu
......
......@@ -33,7 +33,7 @@ train_variables:
- SV1_deltaR
- SV1_Lxy
- SV1_L3d
- SV1_significance3d
- SV1_correctSignificance3d # previously SV1_significance3d
IP2D:
- IP2D_isDefaults
- IP2D_bu
......
......@@ -34,7 +34,7 @@ train_variables:
- SV1_deltaR
- SV1_Lxy
- SV1_L3d
- SV1_significance3d
- SV1_correctSignificance3d # previously SV1_significance3d
IP2D:
- IP2D_isDefaults
- IP2D_bu
......
......@@ -6,8 +6,6 @@ import pathlib
import matplotlib
import yaml
from umami.tools import yaml_loader
class Configuration:
"""
......@@ -29,7 +27,7 @@ class Configuration:
def LoadConfigFile(self):
"""Load config file from disk."""
with open(self.yaml_config, "r") as conf:
self.config = yaml.load(conf, Loader=yaml_loader)
self.config = yaml.load(conf, Loader=yaml.FullLoader)
def GetConfiguration(self):
"""Assigne configuration from file to class variables.
......
......@@ -5,6 +5,8 @@ from functools import reduce
import numpy as np
from umami.configuration import logger
def GetSampleCuts(jets: np.ndarray, cuts: list) -> np.ndarray:
"""
......@@ -42,6 +44,8 @@ def GetSampleCuts(jets: np.ndarray, cuts: list) -> np.ndarray:
If unsupported operator is provided.
"""
if cuts is None:
return []
# define operator dict to be able to call them via string from config
inverted_ops = {
"==": operator.ne,
......@@ -69,6 +73,7 @@ def GetSampleCuts(jets: np.ndarray, cuts: list) -> np.ndarray:
# expect a dictionary with only one entry
cut = list(cut_entry.keys())
logger.debug(f"Cuts: {cuts}")
if len(cut) != 1:
raise KeyError(
"The cut object is expected to be a dictionary with one entry."
......@@ -120,6 +125,7 @@ def GetSampleCuts(jets: np.ndarray, cuts: list) -> np.ndarray:
indices_to_remove = np.where(reduce(operator.or_, cut_rejections, False))[0]
del cut_rejections
logger.debug(f"Cuts remove {len(indices_to_remove)} jets of a total of {len(jets)}")
return indices_to_remove
......
......@@ -752,8 +752,7 @@ def plot_pt_dependence(
disc_cut = kwargs["Disc_Cut_Value"]
kwargs.pop("Disc_Cut_Value")
if "Fixed_WP_Bin" in kwargs:
if fixed_eff_bin is None:
fixed_eff_bin = kwargs["Fixed_WP_Bin"]
fixed_eff_bin = kwargs["Fixed_WP_Bin"]
kwargs.pop("Fixed_WP_Bin")
if "Grid" in kwargs:
grid = kwargs["Grid"]
......
......@@ -7,7 +7,7 @@ import warnings
import yaml
from umami.configuration import logger
from umami.tools import YAML, yaml_loader
from umami.tools import YAML
class Configuration:
......@@ -77,7 +77,7 @@ class Configuration:
os.path.dirname(__file__), self.yaml_default_config
)
with open(self.yaml_default_config, "r") as conf:
self.default_config = yaml.load(conf, Loader=yaml_loader)
self.default_config = self.YAML.load(conf)
logger.info(f"Using config file {self.yaml_config}")
with open(self.yaml_config, "r") as conf:
......
......@@ -9,8 +9,10 @@ import tempfile
import unittest
from shutil import copyfile
import pytest
from umami.configuration import logger, set_log_level
from umami.tools import replaceLineInFile
from umami.tools import compare_leading_spaces, replaceLineInFile
from umami.train_tools.Configuration import Configuration
set_log_level(logger, "DEBUG")
......@@ -84,3 +86,26 @@ class replaceLineInFile_TestCase(unittest.TestCase):
"Defintly_not_in_the_file:",
"model_name: Unittest_Testname",
)
@pytest.mark.parametrize(
"input, expected_result",
[
(("test134", "test789"), 0),
((" test134", " test789"), 0),
((" test134", " test789"), -2),
((" test134", " test789"), 2),
],
)
def test_compare_leading_spaces(input: tuple, expected_result: int) -> None:
"""Test different scenarios for `compare_leading_spaces` function.
Parameters
----------
input : tuple
string tuples to test
expected_result : int
expected result
"""
result = compare_leading_spaces(input[0], input[1])
assert result == expected_result
# flake8: noqa
# pylint: skip-file
from umami.tools.PyATLASstyle.PyATLASstyle import applyATLASstyle, makeATLAStag
from umami.tools.tools import replaceLineInFile, yaml_loader, atoi, natural_keys
from umami.tools.tools import (
atoi,
compare_leading_spaces,
natural_keys,
replaceLineInFile,
yaml_loader,
)
from umami.tools.yaml_tools import YAML
......@@ -3,6 +3,8 @@ import re
import yaml
from umami.configuration.Configuration import logger
# adding a custom yaml loader in order to be able to have nubers with
# scientific notation
# TODO: This should be replaced everywhere with the new YAML loader which
......@@ -24,7 +26,36 @@ yaml_loader.add_implicit_resolver(
)
def replaceLineInFile(file, key, newLine, only_first=False):
def compare_leading_spaces(ref: str, comp: str):
"""Compares if leading spaces of 2 strings are the same.
Parameters
----------
ref : str
reference string
comp : str
comparison string
Returns
-------
int
difference in leading spaces of ref and comp string
"""
ref_spaces = len(ref) - len(ref.lstrip())
comp_spaces = len(comp) - len(comp.lstrip())
logger.debug(f"Leading spaces in {ref}: {ref_spaces}")
logger.debug(f"Leading spaces in {comp}: {comp_spaces}")
diff_spaces = ref_spaces - comp_spaces
if diff_spaces != 0:
logger.warning(
f"Your strings `{ref}` and `{comp}` have a different amount of leading "
f"spaces ({diff_spaces})."
)
return diff_spaces
def replaceLineInFile(file, key, new_line, only_first=False):
"""Replace line in file
Parameters
......@@ -33,7 +64,7 @@ def replaceLineInFile(file, key, newLine, only_first=False):
file name
key : str
key which triggers the replacement of line
newLine : str
new_line : str
content of line replacement
only_first : bool, optional
if True only first line in which key found is replaced, by default False
......@@ -47,35 +78,21 @@ def replaceLineInFile(file, key, newLine, only_first=False):
"""
filedata = ""
if only_first:
replacedLine = False
with open(file, "r") as f:
for line in f:
if key in line and not replacedLine:
line = newLine + "\n"
replacedLine = True
filedata += line
if replacedLine is False:
raise AttributeError(f'No line could be found matching "{key}"')
with open(file, "w") as f:
f.write(filedata)
else:
replacedLine = False
with open(file, "r") as f:
for line in f:
if key in line:
line = newLine + "\n"
replacedLine = True
filedata += line
if replacedLine is False:
raise AttributeError(f'No line could be found matching "{key}"')
with open(file, "w") as f:
f.write(filedata)
replaced_line = False
with open(file, "r") as f:
for line in f:
if key in line:
if (only_first and not replaced_line) or not only_first:
compare_leading_spaces(line, new_line)
line = new_line + "\n"
replaced_line = True
filedata += line
if replaced_line is False:
raise AttributeError(f'No line could be found matching "{key}"')
with open(file, "w") as f:
f.write(filedata)
def atoi(text):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment