Commit e41ebc64 authored by Alexander Froch's avatar Alexander Froch
Browse files

Merge branch 112-flexible-validation-test-file-definition with...

Merge branch 112-flexible-validation-test-file-definition with refs/heads/master into refs/merge-requests/339/train
parents c2cc1d7a 51804e28
Pipeline #3497897 passed with stages
in 9 minutes and 58 seconds
......@@ -27,6 +27,4 @@ python_install/
Preprocessing-parameters-*.yaml
# ignoring preprocessing integration test folders
preprocessing_*/
test_train_*/
# ignoring any test directory
test-*/
test_*_model*/
......@@ -41,30 +41,48 @@ model_file:
# Add training file
train_file: <path>/<to>/<train>/<samples>/train_file.h5
# Add validation files
# ttbar val
validation_file: <path>/<to>/<validation>/<samples>/ttbar_r21_validation_file.h5
# Defining templates for the variable cuts
.variable_cuts_ttbar: &variable_cuts_ttbar
variable_cuts:
- pt_btagJes:
operator: "<="
condition: 2.5e5
.variable_cuts_zpext: &variable_cuts_zpext
variable_cuts:
- pt_btagJes:
operator: ">"
condition: 2.5e5
# zprime val
add_validation_file: <path>/<to>/<validation>/<samples>/zpext_r21_validation_file.h5
ttbar_test_files:
# Add validation files
validation_files:
ttbar_r21_val:
path: <path_palce_holder>/MC16d_hybrid_odd_100_PFlow-no_pTcuts-file_0.h5
label: "$t\\bar{t}$ Release 21"
<<: *variable_cuts_ttbar
zprime_r21_val:
path: <path_palce_holder>/MC16d_hybrid-ext_odd_0_PFlow-no_pTcuts-file_0.h5
label: "$Z'$ Release 21"
<<: *variable_cuts_zpext
test_files:
ttbar_r21:
Path: <path>/<to>/<preprocessed>/<samples>/ttbar_r21_test_file.h5
data_set_name: "ttbar_r21"
path: <path>/<to>/<preprocessed>/<samples>/ttbar_r21_test_file.h5
<<: *variable_cuts_ttbar
ttbar_r22:
Path: <path>/<to>/<preprocessed>/<samples>/ttbar_r22_test_file.h5
data_set_name: "ttbar_r22"
path: <path>/<to>/<preprocessed>/<samples>/ttbar_r22_test_file.h5
<<: *variable_cuts_ttbar
zpext_test_files:
zpext_r21:
Path: <path>/<to>/<preprocessed>/<samples>/zpext_r21_test_file.h5
data_set_name: "zpext_r21"
path: <path>/<to>/<preprocessed>/<samples>/zpext_r21_test_file.h5
<<: *variable_cuts_zpext
zpext_r22:
Path: <path>/<to>/<preprocessed>/<samples>/zpext_r22_test_file.h5
data_set_name: "zpext_r22"
path: <path>/<to>/<preprocessed>/<samples>/zpext_r22_test_file.h5
<<: *variable_cuts_zpext
# Path to Variable dict used in preprocessing
var_dict: <path>/<to>/<variables>/DL1r_Variables.yaml
......@@ -106,6 +124,9 @@ NN_structure:
# Activations of the layers. Starting with first dense layer.
activations: ["relu", "relu", "relu", "relu", "relu", "relu", "relu", "relu"]
# Variables to repeat in the last layer (example)
repeat_end: ["pt_btagJes", "absEta_btagJes"]
# Options for the Learning Rate reducer
LRR: True
......@@ -116,7 +137,7 @@ NN_structure:
# Plotting settings for training metrics plots
Validation_metrics_settings:
# Define which taggers should also be plotted
taggers_from_file: ["RNNIP", "DL1r"]
taggers_from_file: ["rnnip", "DL1r"]
# Label for the freshly trained tagger
tagger_label: "DL1r"
......@@ -158,43 +179,39 @@ Eval_parameters_validation:
"ujets": 0.982,
}
# Cuts which are applied to the different datasets used for evaluation
variable_cuts:
ttbar_r21:
- pt_btagJes:
operator: "<="
condition: 250000
ttbar_r22:
- pt_btagJes:
operator: "<="
condition: 250000
zpext_r21:
- pt_btagJes:
operator: ">"
condition: 250000
zpext_r22:
- pt_btagJes:
operator: ">"
condition: 250000
validation_file:
- pt_btagJes:
operator: "<="
condition: 250000
add_validation_file:
- pt_btagJes:
operator: ">"
condition: 250000
# A list to add available variables to the evaluation files
add_variables_eval: ["actualInteractionsPerCrossing"]
# Working point used in the evaluation
WP: 0.77
# Minimal efficiency considered in ROC computation
eff_min: 0.49
# some properties for the feature importance explanation with SHAPley
shapley:
# Over how many full sets of features it should calculate over.
# Corresponds to the dots in the beeswarm plot.
# 200 takes like 10-15 min for DL1r on a 32 core-cpu
feature_sets: 200
# defines which of the model outputs (flavor) you want to explain
# [tau,b,c,u] := [3, 2, 1, 0]
model_output: 2
# You can also choose if you want to plot the magnitude of feature
# importance for all output nodes (flavors) in another plot. This
# will give you a bar plot of the mean SHAP value magnitudes.
bool_all_flavor_plot: False
# as this takes much longer you can average the feature_sets to a
# smaller set, 50 is a good choice for DL1r
averaged_sets: 50
# [11,11] works well for dl1r
plot_size: [11, 11]
```
It contains the information about the neural network architecture and the training as well as about the files for training, validation and testing. Also evaluation parameters are given for the training evaluation which is performed by the [plotting_epoch_performance.py](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/blob/master/umami/plotting_epoch_performance.py) script.
......@@ -207,10 +224,10 @@ The different options are briefly explained here:
| `preprocess_config` | String | Necessary | Path to the `preprocess_config` which was used to produce the training samples. |
| `model_file` | String | Optional | If you already have a model and want to continue the training of this model, you can give the path to this model here. This model will be loaded and used instead of init a new one. |
| `train_file` | String | Necessary | Path to the training sample. This is given by the `preprocessing` step of Umami |
| `validation_file` | String | Necessary | Path to the validation sample (ttbar). This is given by the `preprocessing` step of Umami |
| `add_validation_file` | String | Necessary | Path to the validation sample (zpext). This is given by the `preprocessing` step of Umami |
| `ttbar_test_files` | Dict | Optional | Here you can define different ttbar test samples that are used in the [`evaluate_model.py`](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/blob/master/umami/evaluate_model.py). Those test samples need to be defined in a dict structure shown in the example. The name of the dict entry is irrelevant while the `Path` and `data_set_name` are important. The `data_set_name` needs to be unique. Its the identifier/name of the dataset in the evaluation file which is used for plotting. For test samples, all samples from the training-dataset-dumper can be used without preprocessing although the preprocessing of Umami produces test samples to ensure orthogonality of the jets with respect to the train sample. |
| `zpext_test_files` | Dict | Optional | Here you can define different zpext test samples that are used in the [`evaluate_model.py`](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/blob/master/umami/evaluate_model.py). Those test samples need to be defined in a dict structure shown in the example. The name of the dict entry is irrelevant while the `Path` and `data_set_name` are important. The `data_set_name` needs to be unique. Its the identifier/name of the dataset in the evaluation file which is used for plotting. For test samples, all samples from the training-dataset-dumper can be used without preprocessing although the preprocessing of Umami produces test samples to ensure orthogonality of the jets with respect to the train sample. |
| `validation_files` | Dict | Optional | Here you can define different validation samples that are used in the training and the `plotting_epoch_performance.py` script. Those validation samples need to be defined in a dict structure shown in the example. The name of the dict entry is relevant and is the unique identifier for this sample (DO NOT USE IT MULTIPLE TIMES). `path` gives the path to the file. |
| `test_files` | Dict | Optional | Here you can define different test samples that are used in the [`evaluate_model.py`](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/blob/master/umami/evaluate_model.py). Those test samples need to be defined in a dict structure shown in the example. The name of the dict entry is relevant and is the unique identifier in the results file which is produced by the [`evaluate_model.py`](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/blob/master/umami/evaluate_model.py). `Path` gives the path to the file. For test samples, all samples from the training-dataset-dumper can be used without preprocessing although the preprocessing of Umami produces test samples to ensure orthogonality of the jets with respect to the train sample. |
| `path` | String | Necessary | Path to the validation/test file which is to be used. Using wildcards is possible. |
| `variable_cuts` | Dict | Optional | Dict of cuts which are applied when loading the different test files. Only jet variables can be cut on. These are in this example defined as templates for the different samples types. |
| `var_dict` | String | Necessary | Path to the variable dict used in the `preprocess_config` to produce the train sample. |
| `exclude` | List | Necessary | List of variables that are excluded from training. Only compatible with DL1r training. To include all, just give an empty list. |
| `NN_structure` | None | Necessary | A dict where all important information for the training are defined. |
......@@ -247,7 +264,6 @@ The different options are briefly explained here:
| `tagger` | List | Necessary | List of taggers used for comparison. This needs to be a list of string or a single string. The name of the taggers must be same as in the evaluation file. For example, if the DL1d probabilities in the test samples are called `DL1dLoose20210607_pb`, the name you need to add to the list is `DL1dLoose20210607`. |
| `frac_values_comp` | Dict | Necessary | Dict with the fraction values for the comparison taggers. For all flavour (except the main flavour), you need to add values here which add up to one. |
| `frac_values` | Dict | Necessary | Dict with the fraction values for the freshly trained tagger. For all flavour (except the main flavour), you need to add values here which add up to one. |
| `variable_cuts` | Dict | Necessary | Dict of cuts which are applied when loading the different test files. Only jet variables can be cut on. |
| `WP` | Float | Necessary | Working point which is used in the validation and evaluation. |
| `eff_min` | Float | Optional | Minimal main class efficiency considered for ROC. |
......
......@@ -34,30 +34,48 @@ model_file:
# Add training file
train_file: <path>/<to>/<train>/<samples>/train_file.h5
# Add validation files
# ttbar val
validation_file: <path>/<to>/<validation>/<samples>/ttbar_r21_validation_file.h5
# Defining templates for the variable cuts
.variable_cuts_ttbar: &variable_cuts_ttbar
variable_cuts:
- pt_btagJes:
operator: "<="
condition: 2.5e5
.variable_cuts_zpext: &variable_cuts_zpext
variable_cuts:
- pt_btagJes:
operator: ">"
condition: 2.5e5
# zprime val
add_validation_file: <path>/<to>/<validation>/<samples>/zpext_r21_validation_file.h5
ttbar_test_files:
# Add validation files
validation_files:
ttbar_r21_val:
path: <path_palce_holder>/MC16d_hybrid_odd_100_PFlow-no_pTcuts-file_0.h5
label: "$t\\bar{t}$ Release 21"
<<: *variable_cuts_ttbar
zprime_r21_val:
path: <path_palce_holder>/MC16d_hybrid-ext_odd_0_PFlow-no_pTcuts-file_0.h5
label: "$Z'$ Release 21"
<<: *variable_cuts_zpext
test_files:
ttbar_r21:
Path: <path>/<to>/<preprocessed>/<samples>/ttbar_r21_test_file.h5
data_set_name: "ttbar_r21"
path: <path>/<to>/<preprocessed>/<samples>/ttbar_r21_test_file.h5
<<: *variable_cuts_ttbar
ttbar_r22:
Path: <path>/<to>/<preprocessed>/<samples>/ttbar_r22_test_file.h5
data_set_name: "ttbar_r22"
path: <path>/<to>/<preprocessed>/<samples>/ttbar_r22_test_file.h5
<<: *variable_cuts_ttbar
zpext_test_files:
zpext_r21:
Path: <path>/<to>/<preprocessed>/<samples>/zpext_r21_test_file.h5
data_set_name: "zpext_r21"
path: <path>/<to>/<preprocessed>/<samples>/zpext_r21_test_file.h5
<<: *variable_cuts_zpext
zpext_r22:
Path: <path>/<to>/<preprocessed>/<samples>/zpext_r22_test_file.h5
data_set_name: "zpext_r22"
path: <path>/<to>/<preprocessed>/<samples>/zpext_r22_test_file.h5
<<: *variable_cuts_zpext
# Path to Variable dict used in preprocessing
var_dict: <path>/<to>/<variables>/Dips_Variables.yaml
......@@ -100,6 +118,9 @@ NN_structure:
# Options for the Learning Rate reducer
LRR: True
# Option if you want to use sample weights for training
use_sample_weights: False
# Plotting settings for training metrics plots
Validation_metrics_settings:
# Define which taggers should also be plotted
......@@ -108,7 +129,7 @@ Validation_metrics_settings:
# Label for the freshly trained tagger
tagger_label: "DIPS"
# Define which freshly trained taggers should be plotted
# Define which freshly trained taggers should be plotted
trained_taggers:
dipsReference:
path: "dips_Loose/validation_WP0p77_300000jets_Dict.json"
......@@ -134,44 +155,14 @@ Eval_parameters_validation:
tagger: ["rnnip", "DL1r"]
# Define fc values for the taggers
frac_values_comp: {
"rnnip": {
"cjets": 0.08,
"ujets": 0.92,
},
"DL1r": {
"cjets": 0.018,
"ujets": 0.982,
},
}
frac_values_comp:
{
"rnnip": {"cjets": 0.08, "ujets": 0.92},
"DL1r": {"cjets": 0.018, "ujets": 0.982},
}
# Charm fraction value used for evaluation of the trained model
frac_values: {
"cjets": 0.018,
"ujets": 0.982,
}
# Cuts which are applied to the different datasets used for evaluation
variable_cuts:
ttbar_r21:
- pt_btagJes:
operator: "<="
condition: 250000
ttbar_r22:
- pt_btagJes:
operator: "<="
condition: 250000
zpext_r21:
- pt_btagJes:
operator: ">"
condition: 250000
zpext_r22:
- pt_btagJes:
operator: ">"
condition: 250000
frac_values: {"cjets": 0.018, "ujets": 0.982}
# Working point used in the evaluation
WP: 0.77
......@@ -190,10 +181,10 @@ The different options are briefly explained here:
| `preprocess_config` | String | Necessary | Path to the `preprocess_config` which was used to produce the training samples. |
| `model_file` | String | Optional | If you already have a model and want to continue the training of this model, you can give the path to this model here. This model will be loaded and used instead of init a new one. |
| `train_file` | String | Necessary | Path to the training sample. This is given by the `preprocessing` step of Umami |
| `validation_file` | String | Necessary | Path to the validation sample (ttbar). This is given by the `preprocessing` step of Umami |
| `add_validation_file` | String | Necessary | Path to the validation sample (zpext). This is given by the `preprocessing` step of Umami |
| `ttbar_test_files` | Dict | Optional | Here you can define different ttbar test samples that are used in the [`evaluate_model.py`](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/blob/master/umami/evaluate_model.py). Those test samples need to be defined in a dict structure shown in the example. The name of the dict entry is irrelevant while the `Path` and `data_set_name` are important. The `data_set_name` needs to be unique. Its the identifier/name of the dataset in the evaluation file which is used for plotting. For test samples, all samples from the training-dataset-dumper can be used without preprocessing although the preprocessing of Umami produces test samples to ensure orthogonality of the jets with respect to the train sample. |
| `zpext_test_files` | Dict | Optional | Here you can define different zpext test samples that are used in the [`evaluate_model.py`](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/blob/master/umami/evaluate_model.py). Those test samples need to be defined in a dict structure shown in the example. The name of the dict entry is irrelevant while the `Path` and `data_set_name` are important. The `data_set_name` needs to be unique. Its the identifier/name of the dataset in the evaluation file which is used for plotting. For test samples, all samples from the training-dataset-dumper can be used without preprocessing although the preprocessing of Umami produces test samples to ensure orthogonality of the jets with respect to the train sample. |
| `validation_files` | Dict | Optional | Here you can define different validation samples that are used in the training and the `plotting_epoch_performance.py` script. Those validation samples need to be defined in a dict structure shown in the example. The name of the dict entry is relevant and is the unique identifier for this sample (DO NOT USE IT MULTIPLE TIMES). `path` gives the path to the file. |
| `test_files` | Dict | Optional | Here you can define different test samples that are used in the [`evaluate_model.py`](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/blob/master/umami/evaluate_model.py). Those test samples need to be defined in a dict structure shown in the example. The name of the dict entry is relevant and is the unique identifier in the results file which is produced by the [`evaluate_model.py`](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/blob/master/umami/evaluate_model.py). `Path` gives the path to the file. For test samples, all samples from the training-dataset-dumper can be used without preprocessing although the preprocessing of Umami produces test samples to ensure orthogonality of the jets with respect to the train sample. |
| `path` | String | Necessary | Path to the validation/test file which is to be used. Using wildcards is possible. |
| `variable_cuts` | Dict | Optional | Dict of cuts which are applied when loading the different test files. Only jet variables can be cut on. These are in this example defined as templates for the different samples types. |
| `var_dict` | String | Necessary | Path to the variable dict used in the `preprocess_config` to produce the train sample. |
| `exclude` | List | Necessary | List of variables that are excluded from training. Only compatible with DL1r training. To include all, just give an empty list. |
|`tracks_name`| String| Necessary* | Name of the tracks data-set to use for training and evaluation, default is "tracks". <br />* ***This option is necessary when using tracks, but, when working with old preprpocessed files (before January 2022) this option has to be removed form the config file to ensure compatibility*** |
......@@ -231,7 +222,6 @@ The different options are briefly explained here:
| `tagger` | List | Necessary | List of taggers used for comparison. This needs to be a list of string or a single string. The name of the taggers must be same as in the evaluation file. For example, if the DL1d probabilities in the test samples are called `DL1dLoose20210607_pb`, the name you need to add to the list is `DL1dLoose20210607`. |
| `frac_values_comp` | Dict | Necessary | Dict with the fraction values for the comparison taggers. For all flavour (except the main flavour), you need to add values here which add up to one. |
| `frac_values` | Dict | Necessary | Dict with the fraction values for the freshly trained tagger. For all flavour (except the main flavour), you need to add values here which add up to one. |
| `variable_cuts` | Dict | Necessary | Dict of cuts which are applied when loading the different test files. Only jet variables can be cut on. |
| `WP` | Float | Necessary | Working point which is used in the validation and evaluation. |
| `Calculate_Saliency` | Bool | Optional | Decide, if the saliency maps are calculated or not. This takes a lot of time and resources! |
......
......@@ -12,23 +12,34 @@ model_name: Eval_results
# Set the option to evaluate a freshly trained model to False
evaluate_trained_model: False
ttbar_test_files:
test_files:
ttbar_r21:
Path: <path>/<to>/<preprocessed>/<samples>/ttbar_r21_test_file.h5
data_set_name: "ttbar_r21"
path: <path>/<to>/<preprocessed>/<samples>/ttbar_r21_test_file.h5
variable_cuts:
- pt_btagJes:
operator: "<="
condition: 250000
ttbar_r22:
Path: <path>/<to>/<preprocessed>/<samples>/ttbar_r22_test_file.h5
data_set_name: "ttbar_r22"
path: <path>/<to>/<preprocessed>/<samples>/ttbar_r22_test_file.h5
variable_cuts:
- pt_btagJes:
operator: "<="
condition: 250000
zpext_test_files:
zpext_r21:
Path: <path>/<to>/<preprocessed>/<samples>/zpext_r21_test_file.h5
data_set_name: "zpext_r21"
path: <path>/<to>/<preprocessed>/<samples>/zpext_r21_test_file.h5
variable_cuts:
- pt_btagJes:
operator: ">"
condition: 250000
zpext_r22:
Path: <path>/<to>/<preprocessed>/<samples>/zpext_r22_test_file.h5
data_set_name: "zpext_r22"
path: <path>/<to>/<preprocessed>/<samples>/zpext_r22_test_file.h5
variable_cuts:
- pt_btagJes:
operator: ">"
condition: 250000
# Values for the neural network
NN_structure:
......@@ -62,28 +73,6 @@ Eval_parameters_validation:
# Charm fraction value used for evaluation of the trained model
frac_values: {"cjets": 0.018, "ujets": 0.982}
# Cuts which are applied to the different datasets used for evaluation
variable_cuts:
ttbar_r21:
- pt_btagJes:
operator: "<="
condition: 250000
ttbar_r22:
- pt_btagJes:
operator: "<="
condition: 250000
zpext_r21:
- pt_btagJes:
operator: ">"
condition: 250000
zpext_r22:
- pt_btagJes:
operator: ">"
condition: 250000
# Working point used in the evaluation
WP: 0.77
```
......@@ -92,8 +81,7 @@ Eval_parameters_validation:
|---------|-----------|--------------------|-------------|
| `model_name` | String | Necessary | Name of the model which is to be trained. Also the foldername where everything of the model will be saved. |
| `evaluate_trained_model` | Bool | Necessary | Needs to be `False` here. Otherwise the script tries to load the freshly trained model
| `ttbar_test_files` | Dict | Optional | Here you can define different ttbar test samples that are used in the [`evaluate_model.py`](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/blob/master/umami/evaluate_model.py). Those test samples need to be defined in a dict structure shown in the example. The name of the dict entry is irrelevant while the `Path` and `data_set_name` are important. The `data_set_name` needs to be unique. Its the identifier/name of the dataset in the evaluation file which is used for plotting. For test samples, all samples from the training-dataset-dumper can be used without preprocessing although the preprocessing of Umami produces test samples to ensure orthogonality of the jets with respect to the train sample. |
| `zpext_test_files` | Dict | Optional | Here you can define different zpext test samples that are used in the [`evaluate_model.py`](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/blob/master/umami/evaluate_model.py). Those test samples need to be defined in a dict structure shown in the example. The name of the dict entry is irrelevant while the `Path` and `data_set_name` are important. The `data_set_name` needs to be unique. Its the identifier/name of the dataset in the evaluation file which is used for plotting. For test samples, all samples from the training-dataset-dumper can be used without preprocessing although the preprocessing of Umami produces test samples to ensure orthogonality of the jets with respect to the train sample. |
| `test_files` | Dict | Optional | Here you can define different test samples that are used in the [`evaluate_model.py`](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/blob/master/umami/evaluate_model.py). Those test samples need to be defined in a dict structure shown in the example. The name of the dict entry is relevant and is the unique identifier in the results file which is produced by the [`evaluate_model.py`](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/blob/master/umami/evaluate_model.py). `Path` gives the path to the file. For test samples, all samples from the training-dataset-dumper can be used without preprocessing although the preprocessing of Umami produces test samples to ensure orthogonality of the jets with respect to the train sample. |
| `NN_structure` | None | Necessary | A dict where all important information for the training are defined. |
| `class_labels` | List | Necessary | List of flavours used in training. NEEDS TO BE THE SAME AS IN THE `preprocess_config`. Even the ordering needs to be the same! |
| `main_class` | String | Necessary | Main class which is to be tagged. Needs to be in `class_labels`. |
......@@ -102,7 +90,6 @@ Eval_parameters_validation:
| `tagger` | List | Necessary | List of taggers used for comparison. This needs to be a list of string or a single string. The name of the taggers must be same as in the evaluation file. For example, if the DL1d probabilities in the test samples are called `DL1dLoose20210607_pb`, the name you need to add to the list is `DL1dLoose20210607`. |
| `frac_values_comp` | Dict | Necessary | Dict with the fraction values for the comparison taggers. For all flavour (except the main flavour), you need to add values here which add up to one. |
| `frac_values` | Dict | Necessary | Dict with the fraction values for the freshly trained tagger. For all flavour (except the main flavour), you need to add values here which add up to one. |
| `variable_cuts` | Dict | Necessary | Dict of cuts which are applied when loading the different test files. Only jet variables can be cut on. |
| `WP` | Float | Necessary | Working point that is used for evaluation. |
To run the evaluation, you can now execute the following command in the `umami/umami` folder where the `evaluate_model.py` is:
......
......@@ -9,30 +9,47 @@ model_file:
# Add training file
train_file: <path_palce_holder>/PFlow-hybrid-preprocessed_shuffled.h5
# Add validation files
# ttbar val
validation_file: <path_palce_holder>/hybrids/MC16d_hybrid_odd_100_PFlow-no_pTcuts-file_0.h5
# Defining templates for the variable cuts
.variable_cuts_ttbar: &variable_cuts_ttbar
variable_cuts:
- pt_btagJes:
operator: "<="
condition: 2.5e5
# zprime val
add_validation_file: <path_palce_holder>/hybrids/MC16d_hybrid-ext_odd_0_PFlow-no_pTcuts-file_0.h5
.variable_cuts_zpext: &variable_cuts_zpext
variable_cuts:
- pt_btagJes:
operator: ">"
condition: 2.5e5
ttbar_test_files:
# Add validation files
validation_files:
ttbar_r21_val:
path: <path_palce_holder>/MC16d_hybrid_odd_100_PFlow-no_pTcuts-file_0.h5
label: "$t\\bar{t}$ Release 21"
<<: *variable_cuts_ttbar
zprime_r21_val:
path: <path_palce_holder>/MC16d_hybrid-ext_odd_0_PFlow-no_pTcuts-file_0.h5
label: "$Z'$ Release 21"
<<: *variable_cuts_zpext
test_files:
ttbar_r21:
Path: <path_palce_holder>/hybrids/MC16d_hybrid_odd_100_PFlow-no_pTcuts-file_1.h5
data_set_name: "ttbar_r21"
path: <path_palce_holder>/MC16d_hybrid_odd_100_PFlow-no_pTcuts-file_1.h5
<<: *variable_cuts_ttbar
ttbar_r22:
Path: <path_palce_holder>/hybrids_r22/MC16d_hybrid-r22_odd_100_PFlow-no_pTcuts-file_1.h5
data_set_name: "ttbar_r22"
path: <path_palce_holder>/MC16d_hybrid-r22_odd_100_PFlow-no_pTcuts-file_1.h5
<<: *variable_cuts_ttbar
zpext_test_files:
zpext_r21:
Path: <path_palce_holder>/hybrids/MC16d_hybrid-ext_odd_0_PFlow-no_pTcuts-file_1.h5
data_set_name: "zpext_r21"
path: <path_palce_holder>/MC16d_hybrid-ext_odd_0_PFlow-no_pTcuts-file_1.h5
<<: *variable_cuts_zpext
zpext_r22:
Path: <path_palce_holder>/hybrids_r22/MC16d_hybrid-r22-ext_odd_0_PFlow-no_pTcuts-file_1.h5
data_set_name: "zpext_r22"
path: <path_palce_holder>/MC16d_hybrid-r22-ext_odd_0_PFlow-no_pTcuts-file_1.h5
<<: *variable_cuts_zpext
# Path to Variable dict used in preprocessing
var_dict: umami/configs/DL1r_Variables.yaml
......@@ -125,38 +142,6 @@ Eval_parameters_validation:
"ujets": 0.982,
}
# Cuts which are applied to the different datasets used for evaluation
variable_cuts:
validation_file:
- pt_btagJes:
operator: "<="
condition: 250000
add_validation_file:
- pt_btagJes:
operator: ">"
condition: 250000
ttbar_r21:
- pt_btagJes:
operator: "<="
condition: 250000
ttbar_r22:
- pt_btagJes:
operator: "<="
condition: 250000
zpext_r21:
- pt_btagJes:
operator: ">"
condition: 250000
zpext_r22:
- pt_btagJes:
operator: ">"
condition: 250000
# A list to add available variables to the evaluation files
add_variables_eval: ["actualInteractionsPerCrossing"]
......
......@@ -9,30 +9,47 @@ model_file:
# Add training file
train_file: <path_palce_holder>/PFlow-hybrid-preprocessed_shuffled.h5
# Add validation files
# ttbar val
validation_file: <path_palce_holder>/MC16d_hybrid_odd_100_PFlow-no_pTcuts-file_0.h5
# Defining templates for the variable cuts
.variable_cuts_ttbar: &variable_cuts_ttbar
variable_cuts:
- pt_btagJes:
operator: "<="
condition: 2.5e5
# zprime val
add_validation_file: <path_palce_holder>/MC16d_hybrid-ext_odd_0_PFlow-no_pTcuts-file_0.h5
.variable_cuts_zpext: &variable_cuts_zpext
variable_cuts:
- pt_btagJes:
operator: ">"
condition: 2.5e5
ttbar_test_files:
# Add validation files
validation_files:
ttbar_r21_val:
path: <path_palce_holder>/MC16d_hybrid_odd_100_PFlow-no_pTcuts-file_0.h5
label: "$t\\bar{t}$ Release 21"
<<: *variable_cuts_ttbar
zprime_r21_val:
path: <path_palce_holder>/MC16d_hybrid-ext_odd_0_PFlow-no_pTcuts-file_0.h5
label: "$Z'$ Release 21"
<<: *variable_cuts_zpext
test_files:
ttbar_r21:
Path: <path_palce_holder>/MC16d_hybrid_odd_100_PFlow-no_pTcuts-file_1.h5
data_set_name: "ttbar_r21"
path: <path_palce_holder>/MC16d_hybrid_odd_100_PFlow-no_pTcuts-file_1.h5
<<: *variable_cuts_ttbar
ttbar_r22:
Path: <path_palce_holder>/MC16d_hybrid-r22_odd_100_PFlow-no_pTcuts-file_1.h5
data_set_name: "ttbar_r22"
path: <path_palce_holder>/MC16d_hybrid-r22_odd_100_PFlow-no_pTcuts-file_1.h5
<<: *variable_cuts_ttbar
zpext_test_files:
zpext_r21:
Path: <path_palce_holder>/MC16d_hybrid-ext_odd_0_PFlow-no_pTcuts-file_1.h5
data_set_name: "zpext_r21"
path: <path_palce_holder>/MC16d_hybrid-ext_odd_0_PFlow-no_pTcuts-file_1.h5
<<: *variable_cuts_zpext
zpext_r22:
Path: <path_palce_holder>/MC16d_hybrid-r22-ext_odd_0_PFlow-no_pTcuts-file_1.h5
data_set_name: "zpext_r22"
path: <path_palce_holder>/MC16d_hybrid-r22-ext_odd_0_PFlow-no_pTcuts-file_1.h5
<<: *variable_cuts_zpext
# Path to Variable dict used in preprocessing
var_dict: <path_palce_holder>/umami/umami/configs/Dips_Variables.yaml
......@@ -136,38 +153,6 @@ Eval_parameters_validation:
# Charm fraction value used for evaluation of the trained model
frac_values: {"cjets": 0.018, "ujets": 0.982}
# Cuts which are applied to the different datasets used for evaluation
variable_cuts:
validation_file: