Commit e3a117d4 authored by Manuel Guth's avatar Manuel Guth
Browse files

Merge branch alfroch_add_continue_training with refs/heads/master into...

Merge branch alfroch_add_continue_training with refs/heads/master into refs/merge-requests/500/train
parents 7468466e 1c126d53
Pipeline #3809102 passed with stages
in 27 minutes and 12 seconds
......@@ -3,6 +3,8 @@
### Latest
- Adding continue_training option [!500](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/500)
- change default fc for evaluation of Dips and Cads in training configs [!499](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/499)
- Use plotting python API in input var plots (track variables) [!498](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/498)
- Remove redundant loading loop [!496](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/496)
......
......@@ -24,7 +24,8 @@ After all files are preprocessed, we can start with the training. The train conf
|---------|--------|-----------|---------------------|-------------|
| `model_name` | All | `str` | Necessary | Name of the model you want to train. This will be the name of the folder, where all results etc. will be saved in. This folder will automatically be created if not existing. |
| `preprocess_config` | All | `str` | Necessary | Path to your preprocess config you used producing your train datasets. When you start the training and the folder for the model is created, this file is copied to the `metadata/` folder inside the model folder. Also, the path here in the train config will be changed to the new path of the preprocess config inside the `metadata/` folder. |
| `model_file` | All | `str` | Optional | If you already have a model and want to continue the training of this model, you can give the path to this model here. This model will be loaded and used instead of init a new one. |
| `model_file` | All | `str` | Optional | If you already have a model and want to use the weights of this model as start point, you can give the path to this model here. This model will be loaded and used instead of init a new one. If you don't set `load_optimiser` in `NN_structure`, the optimiser state will be resetted. If you just want to continue a specific training, use `continue_training` and leave this option empty. |
| `continue_training` | All | `bool` | Optional | If your training died due to time constrains of jobs and you just want to continue the training from the latest point on, set this value to `True`. |
| `train_file` | All | `str` | Necessary | Path to the training sample. This is given by the `preprocessing` step of Umami. If you want to use the tfrecords format to train, this must be the path to the folder where the tfrecords files are saved. |
| `var_dict` | All | `str` | Necessary | Path to the variable dict used in the `preprocess_config` to produce the train sample. |
| `exclude` | DL1r, DL1d | `list` | Necessary | List of variables that are excluded from training. Only compatible with DL1r training. To include all, just give an empty list. |
......
......@@ -6,14 +6,17 @@ import os
import h5py
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint # pylint: disable=import-error
from tensorflow.keras.models import load_model # pylint: disable=import-error
from tensorflow.keras.optimizers import Adam # pylint: disable=import-error
import umami.tf_tools as utf
import umami.train_tools as utt
def Cads_model(train_config, input_shape):
def Cads_model(
train_config: object,
input_shape: tuple,
continue_training: bool = False,
):
"""Keras model definition of CADS.
Parameters
......@@ -22,6 +25,9 @@ def Cads_model(train_config, input_shape):
training config
input_shape : tuple
dataset input shape
continue_training : bool, optional
Decide, if the training is continued using the latest
model file, by default False
Returns
-------
......@@ -29,31 +35,19 @@ def Cads_model(train_config, input_shape):
CADS keras model
int
Number of epochs
int
Starting epoch number
"""
# Load NN Structure and training parameter from file
NN_structure = train_config.NN_structure
load_optimiser = (
NN_structure["load_optimiser"] if "load_optimiser" in NN_structure else True
)
if train_config.model_file is not None:
# Load CADS model from file
logger.info(f"Loading model from: {train_config.model_file}")
cads = load_model(
train_config.model_file,
{
"Sum": utf.Sum,
"Attention": utf.Attention,
"DeepSet": utf.DeepSet,
"AttentionPooling": utf.AttentionPooling,
"DenseNet": utf.DenseNet,
"ConditionalAttention": utf.ConditionalAttention,
"ConditionalDeepSet": utf.ConditionalDeepSet,
},
compile=load_optimiser,
)
# Check if a prepared model is used or not
cads, init_epoch, load_optimiser = utf.prepare_model(
train_config=train_config,
continue_training=continue_training,
)
else:
if cads is None:
# Init a new cads/dips attention model
cads = utf.Deepsets_model(
repeat_input_shape=input_shape,
......@@ -74,7 +68,7 @@ def Cads_model(train_config, input_shape):
attention_softmax=False,
)
if not load_optimiser or train_config.model_file is None:
if load_optimiser is False:
# Set optimiser and loss
model_optimiser = Adam(learning_rate=NN_structure["lr"])
cads.compile(
......@@ -87,7 +81,7 @@ def Cads_model(train_config, input_shape):
if logger.level <= 20:
cads.summary()
return cads, NN_structure["epochs"]
return cads, NN_structure["epochs"], init_epoch
def Cads(args, train_config, preprocess_config):
......@@ -204,9 +198,13 @@ def Cads(args, train_config, preprocess_config):
)
# Init CADS model
cads, epochs = Cads_model(
cads, epochs, init_epoch = Cads_model(
train_config=train_config,
input_shape=(metadata["n_trks"], metadata["n_trk_features"]),
continue_training=train_config.config["continue_training"]
if "continue_training" in train_config.config
and train_config.config["continue_training"] is not None
else False,
)
# Check if epochs is set via argparser or not
......@@ -307,6 +305,7 @@ def Cads(args, train_config, preprocess_config):
else metadata["n_jets"] / NN_structure["batch_size"],
use_multiprocessing=True,
workers=8,
initial_epoch=init_epoch,
)
# Dump dict into json
......
......@@ -13,7 +13,7 @@ from tensorflow.keras.layers import ( # pylint: disable=import-error
Dropout,
Input,
)
from tensorflow.keras.models import Model, load_model # pylint: disable=import-error
from tensorflow.keras.models import Model # pylint: disable=import-error
from tensorflow.keras.optimizers import Adam # pylint: disable=import-error
import umami.tf_tools as utf
......@@ -27,6 +27,7 @@ def DL1_model(
train_config: object,
input_shape: tuple,
feature_connect_indices: list = None,
continue_training: bool = False,
):
"""
Constructs or loads the DL1 model
......@@ -40,11 +41,18 @@ def DL1_model(
Size of the input: (nFeatures,).
feature_connect_indices : list
List with features that are feeded in another time.
continue_training : bool, optional
Decide, if the training is continued using the latest
model file, by default False
Returns
-------
model: keras tensorflow model.
NN_structure["epochs"]: number of epochs to be trained
model : keras model
Keras model.
NN_structure["epochs"] :
number of epochs to be trained
init_epoch : int
Starting epoch number
"""
# Load NN Structure and training parameter from file
......@@ -54,16 +62,14 @@ def DL1_model(
batch_norm = NN_structure["Batch_Normalisation"]
dropout = NN_structure["dropout"]
class_labels = NN_structure["class_labels"]
load_optimiser = (
NN_structure["load_optimiser"] if "load_optimiser" in NN_structure else True
)
# Load model from file if defined
if train_config.model_file is not None:
logger.info(f"Loading model from: {train_config.model_file}")
model = load_model(train_config.model_file, compile=load_optimiser)
# Check if a prepared model is used or not
model, init_epoch, load_optimiser = utf.prepare_model(
train_config=train_config,
continue_training=continue_training,
)
else:
if model is None:
# Define input
inputs = Input(shape=input_shape)
......@@ -99,6 +105,7 @@ def DL1_model(
)(x)
model = Model(inputs=inputs, outputs=predictions)
if load_optimiser is False:
# Compile model with given optimiser
model_optimiser = Adam(learning_rate=NN_structure["lr"])
model.compile(
......@@ -111,7 +118,7 @@ def DL1_model(
if logger.level <= 20:
model.summary()
return model, NN_structure["epochs"]
return model, NN_structure["epochs"], init_epoch
def TrainLargeFile(args, train_config, preprocess_config):
......@@ -231,10 +238,14 @@ def TrainLargeFile(args, train_config, preprocess_config):
)
# Load model and epochs
model, epochs = DL1_model(
model, epochs, init_epoch = DL1_model(
train_config=train_config,
input_shape=(metadata["n_jet_features"],),
feature_connect_indices=feature_connect_indices,
continue_training=train_config.config["continue_training"]
if "continue_training" in train_config.config
and train_config.config["continue_training"] is not None
else False,
)
# Check if epochs is set via argparser or not
......@@ -305,6 +316,7 @@ def TrainLargeFile(args, train_config, preprocess_config):
else metadata["n_jets"] / NN_structure["batch_size"],
use_multiprocessing=True,
workers=8,
initial_epoch=init_epoch,
)
# Dump dict into json
......
......@@ -16,29 +16,38 @@ from tensorflow.keras.layers import ( # pylint: disable=import-error
Masking,
TimeDistributed,
)
from tensorflow.keras.models import Model, load_model # pylint: disable=import-error
from tensorflow.keras.models import Model # pylint: disable=import-error
from tensorflow.keras.optimizers import Adam # pylint: disable=import-error
import umami.tf_tools as utf
import umami.train_tools as utt
def Dips_model(train_config=None, input_shape=None):
def Dips_model(
train_config: object,
input_shape: tuple,
continue_training: bool = False,
):
"""Keras model definition of DIPS.
Parameters
----------
train_config : object, optional
training config, by default None
input_shape : tuple, optional
dataset input shape, by default None
train_config : object
training config
input_shape : tuple
dataset input shape
continue_training : bool, optional
Decide, if the training is continued using the latest
model file, by default False
Returns
-------
keras model
Dips keras model
int
number of epochs
Number of epochs
int
Starting epoch number
"""
# Load NN Structure and training parameter from file
NN_structure = train_config.NN_structure
......@@ -47,19 +56,15 @@ def Dips_model(train_config=None, input_shape=None):
batch_norm = NN_structure["Batch_Normalisation"]
dropout = NN_structure["dropout"]
class_labels = NN_structure["class_labels"]
load_optimiser = (
NN_structure["load_optimiser"] if "load_optimiser" in NN_structure else True
)
if train_config.model_file is not None:
# Load DIPS model from file
logger.info(f"Loading model from: {train_config.model_file}")
dips = load_model(
train_config.model_file, {"Sum": utf.Sum}, compile=load_optimiser
)
# Check if a prepared model is used or not
dips, init_epoch, load_optimiser = utf.prepare_model(
train_config=train_config,
continue_training=continue_training,
)
else:
logger.info("No modelfile provided! Initialize a new one!")
if dips is None:
logger.info("No modelfile provided! Initialising a new one!")
# Set the track input
trk_inputs = Input(shape=input_shape)
......@@ -111,7 +116,7 @@ def Dips_model(train_config=None, input_shape=None):
output = Dense(len(class_labels), activation="softmax", name="Jet_class")(F)
dips = Model(inputs=trk_inputs, outputs=output)
if not load_optimiser or train_config.model_file is None:
if load_optimiser is False:
# Set optimier and loss
model_optimiser = Adam(learning_rate=NN_structure["lr"])
dips.compile(
......@@ -124,7 +129,7 @@ def Dips_model(train_config=None, input_shape=None):
if logger.level <= 20:
dips.summary()
return dips, NN_structure["epochs"]
return dips, NN_structure["epochs"], init_epoch
def Dips(args, train_config, preprocess_config):
......@@ -220,9 +225,13 @@ def Dips(args, train_config, preprocess_config):
)
# Init dips model
dips, epochs = Dips_model(
dips, epochs, init_epoch = Dips_model(
train_config=train_config,
input_shape=(metadata["n_trks"], metadata["n_trk_features"]),
continue_training=train_config.config["continue_training"]
if "continue_training" in train_config.config
and train_config.config["continue_training"] is not None
else False,
)
# Check if epochs is set via argparser or not
......@@ -294,6 +303,7 @@ def Dips(args, train_config, preprocess_config):
else metadata["n_jets"] / NN_structure["batch_size"],
use_multiprocessing=True,
workers=8,
initial_epoch=init_epoch,
)
# Dump dict into json
......
......@@ -18,7 +18,7 @@ from tensorflow.keras.layers import ( # pylint: disable=import-error
Masking,
TimeDistributed,
)
from tensorflow.keras.models import Model, load_model # pylint: disable=import-error
from tensorflow.keras.models import Model # pylint: disable=import-error
from tensorflow.keras.optimizers import Adam # pylint: disable=import-error
import umami.tf_tools as utf
......@@ -26,24 +26,34 @@ import umami.train_tools as utt
from umami.preprocessing_tools import GetVariableDict
def Umami_model(train_config=None, input_shape=None, njet_features=None):
def Umami_model(
train_config: object,
input_shape: tuple,
njet_features: int,
continue_training: bool = False,
):
"""Keras model definition of UMAMI tagger.
Parameters
----------
train_config : object, optional
training config, by default None
input_shape : tuple, optional
dataset input shape, by default None
njet_features: int, optional
number of jet features, by default None
train_config : object
training config
input_shape : tuple
dataset input shape
njet_features: int
number of jet features
continue_training : bool, optional
Decide, if the training is continued using the latest
model file, by default False
Returns
-------
keras model
UMAMI keras model
int
number of epochs
Number of epochs
int
Starting epoch number
"""
# Load NN Structure and training parameter from file
NN_structure = train_config.NN_structure
......@@ -52,19 +62,15 @@ def Umami_model(train_config=None, input_shape=None, njet_features=None):
batch_norm = NN_structure["Batch_Normalisation"]
dropout = NN_structure["dropout"]
class_labels = NN_structure["class_labels"]
load_optimiser = (
NN_structure["load_optimiser"] if "load_optimiser" in NN_structure else True
)
if train_config.model_file is not None:
# Load DIPS model from file
logger.info(f"Loading model from: {train_config.model_file}")
umami = load_model(
train_config.model_file, {"Sum": utf.Sum}, compile=load_optimiser
)
# Check if a prepared model is used or not
umami, init_epoch, load_optimiser = utf.prepare_model(
train_config=train_config,
continue_training=continue_training,
)
else:
logger.info("No modelfile provided! Initialise a new one!")
if umami is None:
logger.info("No modelfile provided! Initialising a new one!")
# Set the track input
trk_inputs = Input(shape=input_shape)
......@@ -151,6 +157,7 @@ def Umami_model(train_config=None, input_shape=None, njet_features=None):
inputs=[trk_inputs, jet_inputs], outputs=[dips_output, jet_output]
)
if load_optimiser is False:
# Set optimier and loss
model_optimiser = Adam(learning_rate=NN_structure["lr"])
umami.compile(
......@@ -164,7 +171,7 @@ def Umami_model(train_config=None, input_shape=None, njet_features=None):
if logger.level <= 20:
umami.summary()
return umami, NN_structure["epochs"]
return umami, NN_structure["epochs"], init_epoch
def Umami(args, train_config, preprocess_config):
......@@ -311,7 +318,7 @@ def Umami(args, train_config, preprocess_config):
" directory with TF Record Files. You should check this."
)
umami, _ = Umami_model(
umami, _, init_epoch = Umami_model(
train_config=train_config,
input_shape=(metadata["n_trks"], metadata["n_trk_features"]),
njet_features=metadata["n_jet_features"],
......@@ -382,6 +389,7 @@ def Umami(args, train_config, preprocess_config):
else metadata["n_jets"] / NN_structure["batch_size"],
use_multiprocessing=True,
workers=8,
initial_epoch=init_epoch,
)
# Dump dict into json
......
......@@ -8,27 +8,32 @@ import h5py
import tensorflow as tf
import yaml
from tensorflow.keras.callbacks import ModelCheckpoint # pylint: disable=import-error
from tensorflow.keras.models import load_model # pylint: disable=import-error
from tensorflow.keras.optimizers import Adam # pylint: disable=import-error
import umami.tf_tools as utf
import umami.train_tools as utt
# from umami.institutes.utils import is_qsub_available, submit_zeuthen
from umami.tools import yaml_loader
def Umami_model(train_config=None, input_shape=None, njet_features=None):
def Umami_model(
train_config: object,
input_shape: tuple,
njet_features: int,
continue_training: bool = False,
):
"""Keras model definition of UMAMI tagger.
Parameters
----------
train_config : object, optional
training config, by default None
input_shape : tuple, optional
dataset input shape, by default None
njet_features: int, optional
number of jet features, by default None
train_config : object
training config
input_shape : tuple
dataset input shape
njet_features: int
number of jet features
continue_training : bool, optional
Decide, if the training is continued using the latest
model file, by default False
Returns
-------
......@@ -36,26 +41,19 @@ def Umami_model(train_config=None, input_shape=None, njet_features=None):
UMAMI with conditional attention keras model
int
number of epochs
int
Starting epoch number
"""
# Load NN Structure and training parameter from file
NN_structure = train_config.NN_structure
if train_config.model_file is not None:
# Load DIPS model from file
logger.info(f"Loading model from: {train_config.model_file}")
custom_obj = {
"Sum": utf.Sum,
"Attention": utf.Attention,
"DeepSet": utf.DeepSet,
"AttentionPooling": utf.AttentionPooling,
"DenseNet": utf.DenseNet,
"ConditionalAttention": utf.ConditionalAttention,
"ConditionalDeepset": utf.ConditionalDeepSet,
}
umami = load_model(train_config.model_file, custom_obj, compile=False)
# Check if a prepared model is used or not
umami, init_epoch, load_optimiser = utf.prepare_model(
train_config=train_config,
continue_training=continue_training,
)
else:
if umami is None:
logger.info("No modelfile provided! Initialize a new one!")
umami = utf.Deepsets_model_umami(
......@@ -80,20 +78,21 @@ def Umami_model(train_config=None, input_shape=None, njet_features=None):
attention_softmax=False,
)
if load_optimiser is False:
# Set optimier and loss
model_optimizer = Adam(learning_rate=NN_structure["lr"])
umami.compile(
loss="categorical_crossentropy",
loss_weights={"dips": NN_structure["dips_loss_weight"], "umami": 1},
optimizer=model_optimizer,
metrics=["accuracy"],
)
# Print Umami model summary when log level lower or equal INFO level
if logger.level <= 20:
umami.summary()
# Set optimier and loss
model_optimizer = Adam(learning_rate=NN_structure["lr"])
umami.compile(
loss="categorical_crossentropy",
loss_weights={"dips": NN_structure["dips_loss_weight"], "umami": 1},
optimizer=model_optimizer,
metrics=["accuracy"],
)
return umami, NN_structure["epochs"]
return umami, NN_structure["epochs"], init_epoch
def UmamiCondAtt(args, train_config, preprocess_config):
......@@ -232,10 +231,14 @@ def UmamiCondAtt(args, train_config, preprocess_config):
"a directory with TF Record Files. You should check this."
)
umami, _ = Umami_model(
umami, _, init_epoch = Umami_model(
train_config=train_config,
input_shape=(metadata["n_trks"], metadata["n_trk_features"]),
njet_features=metadata["n_jet_features"],
continue_training=train_config.config["continue_training"]
if "continue_training" in train_config.config
and train_config.config["continue_training"] is not None
else False,
)
# Check if epochs is set via argparser or not
......@@ -306,6 +309,7 @@ def UmamiCondAtt(args, train_config, preprocess_config):
else metadata["n_jets"] / NN_structure["batch_size"],
use_multiprocessing=True,
workers=8,
initial_epoch=init_epoch,
)
# Dump dict into json
......
......@@ -20,5 +20,5 @@ from umami.tf_tools.layers import (
Sum,
)
from umami.tf_tools.load_tfrecord import TFRecordReader, load_tfrecords_train_dataset
from umami.tf_tools.models import Deepsets_model, Deepsets_model_umami
from umami.tf_tools.models import Deepsets_model, Deepsets_model_umami, prepare_model
from umami.tf_tools.tools import GetLRReducer
......@@ -2,6 +2,8 @@
Implementations by Johnny Raine
"""
import os
import tensorflow.keras.backend as K # pylint: disable=import-error
from tensorflow.keras.layers import ( # pylint: disable=import-error
Activation,
......@@ -12,7 +14,10 @@ from tensorflow.keras.layers import ( # pylint: disable=import-error
Input,
Lambda,
)
from tensorflow.keras.models import Model # pylint: disable=import-error
from tensorflow.keras.models import Model, load_model # pylint: disable=import-error
from umami.configuration import logger
from umami.tools import natural_keys
from .layers import (
Attention,
......@@ -22,9 +27,117 @@ from .layers import (
DeepSet,
DenseNet,
MaskedAverage1DPooling,
Sum,
)