Commit feb6934a authored by Franchellucci Stefano's avatar Franchellucci Stefano
Browse files

Modified scripts/check_lwtnn-model.py in order to keep --scale_dict option + relevant documentation

parent af02a9d6
Pipeline #3300529 passed with stages
in 22 minutes and 17 seconds
......@@ -270,11 +270,17 @@ ADDPATH=MyDipsTraining-diff
TAGGER=MyDipsTraining
# Path to the prepared ntuple
HDFFILE=ftag-output.h5
# Path to the config file used for the training
# Then only one of the two following options needs to be give:
# - 1 Path to the config file used for the training
CONFIG=examples/Dips-PFlow-Training-config.yaml
# - 2 Path to the scale dictionary
SCALEDICT=MyDipsTraining_scale_dict.json
# Execute the script
python scripts/check_lwtnn-model.py -i ${HDFFILE} -v ${VARIABLESDICT} -t ${TAGGER} -m ${MODEL} -c ${CONFIG} -o ${ADDPATH}
# or
python scripts/check_lwtnn-model.py -i ${HDFFILE} -v ${VARIABLESDICT} -t ${TAGGER} -m ${MODEL} -s ${SCALEDICT} -o ${ADDPATH}
```
Typically, we are happy when the scores match within 1e-5.
\ No newline at end of file
......@@ -10,6 +10,7 @@ import umami.preprocessing_tools as upt
import umami.train_tools as utt
from umami.configuration import logger
from umami.tf_tools import Sum
from umami.configuration.Configuration import Configuration
def GetParser():
......@@ -23,6 +24,14 @@ def GetParser():
type=str,
help="hdf5 input with taggers included for comparison.",
)
parser.add_argument(
"-s",
"--scale_dict",
type=str,
default=None,
help="""scale_dict file containing scaling and shifting
values.""",
)
parser.add_argument(
"-v",
"--var_dict",
......@@ -55,7 +64,7 @@ def GetParser():
"-c",
"--config",
type=str,
required=True,
default=None,
help="Training Config yaml file.",
)
......@@ -72,6 +81,13 @@ def load_model_umami(model_file, X_test_trk, X_test_jet):
return pred_dips, pred_umami
# workaround to not use the full preprocessing config
class config:
def __init__(self, preprocess_config):
self.dict_file = preprocess_config
self.preparation = {"class_labels": ["ujets", "cjets", "bjets"]}
def __run():
args = GetParser()
logger.info(f"Opening input file {args.input}")
......@@ -79,9 +95,21 @@ def __run():
df = pd.DataFrame(file["jets"][:])
df.query("HadronConeExclTruthLabelID <= 5", inplace=True)
training_config = utt.Configuration(args.config)
preprocess_config = upt.Configuration(training_config.preprocess_config)
class_labels = training_config.NN_structure["class_labels"]
if args.config is not None:
if args.scale_dict is not None:
raise ValueError(
"Both --confing and --scale_dict options were given, only one of them needs to be used"
)
training_config = utt.Configuration(args.config)
preprocess_config = upt.Configuration(training_config.preprocess_config)
class_labels = training_config.NN_structure["class_labels"]
elif args.scale_dict is not None:
preprocess_config = config(args.scale_dict)
class_labels = preprocess_config.preparation["class_labels"]
else:
raise ValueError(
"Missing option, either --config or --scale_dict needs to be specified (only one of them)"
)
logger.info(f"Evaluating {args.model}")
......@@ -147,31 +175,31 @@ def __run():
evaluated = "eval_pu"
df["diff"] = abs(df[evaluated] - df[f"{args.tagger}_pu"])
df_select = df.query("diff>1e-6")
df_select = df.query("diff>1e-6").copy()
print(
"Differences off 1e-6", round(len(df_select) / len(df) * 100, 2), "%"
)
df_select = df.query("diff>2e-6")
df_select = df.query("diff>2e-6").copy()
print(
"Differences off 2e-6", round(len(df_select) / len(df) * 100, 2), "%"
)
df_select = df.query("diff>3e-6")
df_select = df.query("diff>3e-6").copy()
print(
"Differences off 3e-6", round(len(df_select) / len(df) * 100, 2), "%"
)
df_select = df.query("diff>4e-6")
df_select = df.query("diff>4e-6").copy()
print(
"Differences off 4e-6", round(len(df_select) / len(df) * 100, 2), "%"
)
df_select = df.query("diff>5e-6")
df_select = df.query("diff>5e-6").copy()
print(
"Differences off 5e-6", round(len(df_select) / len(df) * 100, 2), "%"
)
df_select = df.query("diff>1e-5")
df_select = df.query("diff>1e-5").copy()
print(
"Differences off 1e-5", round(len(df_select) / len(df) * 100, 2), "%"
)
df_select = df.query("diff>1e-6")
df_select = df.query("diff>1e-6").copy()
df_select["diff"] = abs(
df_select[evaluated] - df_select[f"{args.tagger}_pu"]
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment