Commit 4775446c authored by Alexander Froch's avatar Alexander Froch
Browse files

Merge branch 'alfroch-remove-hardcoded-labels' into 'preprocessing-remake'

Remove hard-coded labels for DIPS

See merge request !155
parents 129bb7be 7d2ac373
Pipeline #2931976 passed with stages
in 9 minutes and 16 seconds
......@@ -21,4 +21,3 @@ plots/*
env/*
.vscode/*
!umami/tests/unit/**/*.png
python_install/
# Tag of TensorFlow base image
# https://pypi.org/project/tensorflow/#history
variables:
TFTAG: 2.6.0
TORCHTAG: 1.9.0-cuda11.1-cudnn8-runtime
TFTAG: 2.5.0
stages:
- linting
......@@ -31,20 +30,11 @@ linter:
- if: $CI_COMMIT_BRANCH != ''
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
yaml_linter:
stage: linting
image: sdesbure/yamllint
script:
- 'yamllint -d "{extends: relaxed, rules: {line-length: disable}}" .'
rules:
- if: $CI_COMMIT_BRANCH != ''
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
test_coverage:
stage: coverage_test_stage
image: python:3.7-slim
script:
- pip install --upgrade pip setuptools wheel
- pip install --upgrade pip
- pip install -r requirements.txt
- cd ./coverage_files/
- coverage combine
......@@ -52,14 +42,13 @@ test_coverage:
- coverage xml
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_PROJECT_PATH=="atlas-flavor-tagging-tools/algorithms/umami"
- if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_PROJECT_PATH=="atlas-flavor-tagging-tools/algorithms/umami"
artifacts:
when: always
paths:
- coverage_files/
reports:
cobertura: coverage_files/coverage.xml
retry: 2
include:
- 'pipelines/.unit_test-gitlab-ci.yaml'
......
......@@ -19,4 +19,4 @@ repos:
language: system
entry: flake8
types: [python]
exclude: setup.py
exclude: setup.py
\ No newline at end of file
......@@ -11,7 +11,7 @@ RUN apt-get update && \
echo "krb5-config krb5-config/add_servers_realm string CERN.CH" | debconf-set-selections && \
echo "krb5-config krb5-config/default_realm string CERN.CH" | debconf-set-selections && \
apt-get install -y krb5-user && \
apt-get install -y vim nano emacs less screen graphviz python3-tk wget
apt-get install -y vim nano emacs less screen graphviz python3-tk wget
COPY requirements.txt .
......
......@@ -26,7 +26,7 @@ ttbar_test_files:
data_set_name: "ttbar_comparison"
zpext_test_files:
zpext_r21:
zpext_r21:
Path: /work/ws/nemo/fr_af1100-Training-Simulations-0/hybrids/MC16d_hybrid-ext_odd_0_PFlow-no_pTcuts-file_1.h5
data_set_name: "zpext"
......@@ -45,11 +45,11 @@ bool_use_taus: False
exclude: []
NN_structure:
NN_structure:
lr: 0.01
batch_size: 15000
activations: ["relu", "relu", "relu", "relu", "relu", "relu", "relu", "relu"]
units: [256, 128, 60, 48, 36, 24, 12, 6]
units: [256, 128, 60, 48, 36, 24, 12, 6]
# Eval parameters for validation evaluation while training
Eval_parameters_validation:
......@@ -106,3 +106,4 @@ Eval_parameters_validation:
# Set the datatype of the plots
plot_datatype: "pdf"
......@@ -26,7 +26,7 @@ ttbar_test_files:
data_set_name: "ttbar_comparison"
zpext_test_files:
zpext_r21:
zpext_r21:
Path: /work/ws/nemo/fr_af1100-Training-Simulations-0/hybrids/MC16d_hybrid-ext_odd_0_PFlow-no_pTcuts-file_1.h5
data_set_name: "zpext"
......@@ -100,4 +100,4 @@ Eval_parameters_validation:
SecondTag: "\n$\\sqrt{s}=13$ TeV, PFlow jets"
# Set the datatype of the plots
plot_datatype: "pdf"
plot_datatype: "pdf"
\ No newline at end of file
......@@ -42,7 +42,7 @@ preparation:
training_ttbar_cjets:
type: ttbar
category: cjets
# Number of c jets available in MC16d
# Number of c jets available in MC16d
n_jets: 12745953
n_split: 13
cuts:
......@@ -170,7 +170,7 @@ bool_process_taus: False
# set to true if extended flavour labelling scheme is used in preprocessing
bool_extended_labelling: False
# Define undersampling method used. Valid are "count", "weight",
# Define undersampling method used. Valid are "count", "weight",
# "count_bcl_weight_tau", "template_b" and "template_b_count"
# count_bcl_weight_tau is a hybrid of count and weight to deal with taus.
# template_b uses the b as the target distribution, but does not guarantee
......
......@@ -42,7 +42,7 @@ preparation:
training_ttbar_cjets:
type: ttbar
category: cjets
# Number of c jets available in MC16d
# Number of c jets available in MC16d
n_jets: 12745953
n_split: 13
cuts:
......@@ -170,7 +170,7 @@ bool_process_taus: False
# set to true if extended flavour labelling scheme is used in preprocessing
bool_extended_labelling: False
# Define undersampling method used. Valid are "count", "weight",
# Define undersampling method used. Valid are "count", "weight",
# "count_bcl_weight_tau", "template_b" and "template_b_count"
# count_bcl_weight_tau is a hybrid of count and weight to deal with taus.
# template_b uses the b as the target distribution, but does not guarantee
......
import numpy as np
from umami.preprocessing import PDFSampling
# create some dummy data
x = np.random.default_rng().normal(size=1000)
y = np.random.default_rng().normal(1, 2, size=1000)
# get 2d histogram of our dummy data
h_original, x_bins, y_bins = np.histogram2d(x, y, [4, 5])
# calculate a custom function
pt = np.cos(x ** 2) + np.sin(x + y) + np.exp(x)
eta = 20 - y ** 2
h_target, _, _ = np.histogram2d(pt, eta, bins=[x_bins, y_bins])
ps = PDFSampling()
ps.CalculatePDFRatio(h_target, h_original, x_bins, y_bins)
ps.save("custom-pdf.pkl")
# Add train_config filepath
train_config: /work/ws/nemo/fr_af1100-Training-Simulations-0/b-Tagging/Submission_Scripts/Train_Dips_Loose/configs/Dips-PFlow-Training-config.yaml
# Add preprocess_config with which the used model is trained
preprocess_config: /work/ws/nemo/fr_af1100-Training-Simulations-0/b-Tagging/Submission_Scripts/Train_Dips_Loose/configs/PFlow-Preprocessing.yaml
# Path to Variable dict used in preprocessing
var_dict: /work/ws/nemo/fr_af1100-Training-Simulations-0/b-Tagging/Submission_Scripts/Train_Dips_Loose/configs/Dips_Variables.yaml
# File to test the dips integration in the dumper
test_file: /work/ws/nemo/fr_af1100-Training-Simulations-0/DAOD_PHYSVAL.25394913._000001.pool.root.1_loose.h5
# Path to model dirs
model_file: /work/ws/nemo/fr_af1100-Training-Simulations-0/b-Tagging/packages/umami/umami/dips_Loose_lr_0.001_bs_15000_epoch_200_nTrainJets_Full/dips_model_59.h5
\ No newline at end of file
......@@ -17,7 +17,7 @@ ttbar_test_files:
data_set_name: "ttbar_comparison"
zpext_test_files:
zpext_r21:
zpext_r21:
Path: /work/ws/nemo/fr_af1100-Training-Simulations-0/hybrids/MC16d_hybrid-ext_odd_0_PFlow-no_pTcuts-file_1.h5
data_set_name: "zpext"
......@@ -42,4 +42,4 @@ Eval_parameters_validation:
fc_values_comp: {
"rnnip": 0.08,
"DL1r": 0.018,
}
}
\ No newline at end of file
......@@ -27,31 +27,31 @@ jets_input_vars:
special_param_jets:
IP2D_cu:
lim_left: -30
lim_right: 30
IP2D_bu:
lim_right: 30
IP2D_bu:
lim_left: -30
lim_right: 30
IP2D_bc:
lim_right: 30
IP2D_bc:
lim_left: -30
lim_right: 30
IP3D_cu:
lim_right: 30
IP3D_cu:
lim_left: -30
lim_right: 30
IP3D_bu:
lim_right: 30
IP3D_bu:
lim_left: -30
lim_right: 30
IP3D_bc:
lim_right: 30
IP3D_bc:
lim_left: -30
lim_right: 30
SV1_NGTinSvx:
lim_left: 0
lim_right: 19
lim_right: 19
JetFitterSecondaryVertex_nTracks:
lim_left: 0
lim_right: 17
lim_right: 17
JetFitter_nTracksAtVtx:
lim_left: 0
lim_right: 19
lim_right: 19
JetFitter_nSingleTracks:
lim_left: 0
lim_right: 18
......@@ -62,50 +62,50 @@ jets_input_vars:
lim_left: 0
lim_right: 200
binning:
IP2D_cu: 100
IP2D_bu: 100
IP2D_bc: 100
IP2D_isDefaults: 2
IP3D_cu: 100
IP3D_bu: 100
IP3D_bc: 100
IP3D_isDefaults: 2
JetFitter_mass: 100
JetFitter_energyFraction: 100
JetFitter_significance3d: 100
JetFitter_deltaR: 100
JetFitter_nVTX: 7
JetFitter_nSingleTracks: 19
JetFitter_nTracksAtVtx: 20
JetFitter_N2Tpair: 201
JetFitter_isDefaults: 2
IP2D_cu : 100
IP2D_bu : 100
IP2D_bc : 100
IP2D_isDefaults : 2
IP3D_cu : 100
IP3D_bu : 100
IP3D_bc : 100
IP3D_isDefaults : 2
JetFitter_mass : 100
JetFitter_energyFraction : 100
JetFitter_significance3d : 100
JetFitter_deltaR : 100
JetFitter_nVTX : 7
JetFitter_nSingleTracks : 19
JetFitter_nTracksAtVtx : 20
JetFitter_N2Tpair : 201
JetFitter_isDefaults : 2
JetFitterSecondaryVertex_minimumTrackRelativeEta: 11
JetFitterSecondaryVertex_averageTrackRelativeEta: 11
JetFitterSecondaryVertex_maximumTrackRelativeEta: 11
JetFitterSecondaryVertex_maximumAllJetTrackRelativeEta: 11
JetFitterSecondaryVertex_minimumAllJetTrackRelativeEta: 11
JetFitterSecondaryVertex_averageAllJetTrackRelativeEta: 11
JetFitterSecondaryVertex_displacement2d: 100
JetFitterSecondaryVertex_displacement3d: 100
JetFitterSecondaryVertex_mass: 100
JetFitterSecondaryVertex_energy: 100
JetFitterSecondaryVertex_energyFraction: 100
JetFitterSecondaryVertex_isDefaults: 2
JetFitterSecondaryVertex_nTracks: 18
pt_btagJes: 100
absEta_btagJes: 100
SV1_Lxy: 100
SV1_N2Tpair: 8
SV1_NGTinSvx: 20
SV1_masssvx: 100
SV1_efracsvx: 100
SV1_significance3d: 100
SV1_deltaR: 10
SV1_L3d: 100
SV1_isDefaults: 2
rnnip_pb: 50
rnnip_pc: 50
rnnip_pu: 50
JetFitterSecondaryVertex_maximumAllJetTrackRelativeEta : 11
JetFitterSecondaryVertex_minimumAllJetTrackRelativeEta : 11
JetFitterSecondaryVertex_averageAllJetTrackRelativeEta : 11
JetFitterSecondaryVertex_displacement2d : 100
JetFitterSecondaryVertex_displacement3d : 100
JetFitterSecondaryVertex_mass : 100
JetFitterSecondaryVertex_energy : 100
JetFitterSecondaryVertex_energyFraction : 100
JetFitterSecondaryVertex_isDefaults : 2
JetFitterSecondaryVertex_nTracks : 18
pt_btagJes : 100
absEta_btagJes : 100
SV1_Lxy : 100
SV1_N2Tpair : 8
SV1_NGTinSvx : 20
SV1_masssvx : 100
SV1_efracsvx : 100
SV1_significance3d : 100
SV1_deltaR : 10
SV1_L3d : 100
SV1_isDefaults : 2
rnnip_pb : 50
rnnip_pc : 50
rnnip_pu : 50
flavours:
b: 5
c: 4
......@@ -178,4 +178,4 @@ tracks_input_vars:
flavours:
b: 5
c: 4
u: 0
u: 0
\ No newline at end of file
......@@ -9,7 +9,7 @@ scores_DL1r: # Each item on this level defines one plot. The name of this key is
type: "scores"
data_set_name: "ttbar" # data set to use. This chooses either the test_file ('ttbar') or the add_test_file ('zpext')
# To include taus, add "ptau" as the last entry
prediction_labels: ["DL1_pb", "DL1_pc", "DL1_pu"] # For umami use umami_pX or dips_pX.
prediction_labels: ["DL1_pb", "DL1_pc", "DL1_pu"] # For umami use umami_pX or dips_pX.
plot_settings: # All options of the score plot can be changed here
UseAtlasTag: True # Enable/Disable AtlasTag
AtlasTag: "Internal Simulation"
......@@ -58,9 +58,9 @@ DL1r_c_flavour:
binomialErrors: true
SecondTag: "\n$\\sqrt{s}=13$ TeV, PFlow jets,\n$t\\bar{t}$ test sample, fc=0.018"
# To do a DL1r_t_flavour ROC plot:
# To do a DL1r_t_flavour ROC plot:
# Same as above with DL1_trej and DL1r_trej (replace "c" flavour by "t").
# Example of a pt vs efficiency plot in a small pT region
eff_vs_pt_small:
type: "ROCvsVar"
......@@ -69,8 +69,8 @@ eff_vs_pt_small:
flat_eff: True # bool whether to plot a flat b-efficiency as a function of var
efficiency: 70 # the targeted efficiency
fc: 0.018
prediction_labels: ["DL1_pb", "DL1_pc", "DL1_pu"] # the prediction label to use
variable: pt # which variable to plot the efficiency as a function of.
prediction_labels: ["DL1_pb", "DL1_pc", "DL1_pu"] # the prediction label to use
variable: pt # which variable to plot the efficiency as a function of.
max_variable: 1500000 #maximum value of the range of variable.
min_variable: 10000 #minimum value of the range of variable.
nbin: 100 #number of bin to use
......@@ -93,8 +93,8 @@ eff_vs_pt_large:
data_set_name: "ttbar"
flat_eff: True #bool whether to plot a flat b-efficiency as a function of var
efficiency: 70 #the targeted efficiency
prediction_labels: ["DL1_pb", "DL1_pc", "DL1_pu"] # the prediction label to use
variable: pt #which variable to plot the efficiency as a function of.
prediction_labels: ["DL1_pb", "DL1_pc", "DL1_pu"] # the prediction label to use
variable: pt #which variable to plot the efficiency as a function of.
max_variable: 5000000 #maximum value of the range of variable.
min_variable: 200000 #minimum value of the range of variable.
nbin: 15 #number of bin to use
......
......@@ -48,7 +48,7 @@ confusion_matrix_Umami_ttbar:
prediction_labels: ["umami_pb", "umami_pc", "umami_pu"] # For umami use umami_pX or dips_pX. The order matters!
# Scanning b-eff, comparing Umami and DL1r, ttbar
beff_scan_tagger_umami:
beff_scan_tagger_compare_umami:
type: "ROC"
models_to_plot:
umami_ttbar_urej:
......@@ -111,7 +111,6 @@ Umami_prob_comparison_pb:
SecondTag: "\n$\\sqrt{s}=13$ TeV, PFlow Jets"
yAxisAtlasTag: 0.9
Ratio_Cut: [0.5, 1.5]
# Scanning b-eff, comparing Umami and DL1r, ttbar
beff_scan_tagger_compare_umami:
type: "ROC_Comparison"
......
......@@ -22,7 +22,7 @@ ttbar_test_files:
data_set_name: "ttbar"
zpext_test_files:
zpext_r21:
zpext_r21:
Path: /nfs/dust/atlas/user/ahnenjan/phd/umami/run/samples/standard_mc16d_ttbar_Zext__2M/hybridLargeFiles/MC16d_hybrid-ext_odd_0_PFlow-no_pTcuts-file_1.h5
data_set_name: "zpext"
......@@ -33,7 +33,7 @@ bool_use_taus: False
exclude: []
NN_structure:
NN_structure:
lr: 0.01
batch_size: 5000
epochs: 200
......
......@@ -28,7 +28,7 @@ nav:
- Running DL1r: DL1r-instructions.md
- Running Dips: Dips-instructions.md
- Running Umami: Umami-instructions.md
- LWTNN Conversion: LWTNN-conversion.md
- LWTNN Conversion: LWTNN-conversion.md
- Evaluate Taggers in Samples: WO_trained_model.md
- Plotting evaluated Results: plotting_umami.md
......
# ----------------------------------------------------------------------------
# Umami base + Umami images: only get built on master and tags
# Umami base + Umami images: only get built on master
# (see below for tags)
# ----------------------------------------------------------------------------
.image_build_template: &image_build_template
......@@ -8,7 +9,6 @@
- ignore
tags:
- docker-image-build
retry: 2
build_umamibase_cpu:
......@@ -37,17 +37,6 @@ build_umamibase_gpu:
variables:
TO: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:$CI_COMMIT_REF_SLUG-gpu'
build_umamibase_gpu_pytorch:
<<: *image_build_template
stage: builds
variables:
DOCKER_FILE: docker/umamibase/Dockerfile
FROM: pytorch/pytorch:$TORCHTAG
TO: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/umamibase:latest-pytorch-gpu'
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
# Umami images: use base image as a foundation to speed up build process
build_umami_cpu:
<<: *image_build_template
......@@ -98,18 +87,7 @@ build_umamibase_cpu_MR:
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_PROJECT_PATH=="atlas-flavor-tagging-tools/algorithms/umami"
# possibility to trigger also the GPU image in a MR - but only manually
build_umamibase_gpu_MR:
<<: *image_build_template
stage: image_build_umamibase
variables:
DOCKER_FILE: docker/umamibase/Dockerfile
FROM: tensorflow/tensorflow:$TFTAG-gpu
TO: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/temporary_images:${CI_MERGE_REQUEST_IID}-gpu-base'
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_PROJECT_PATH=="atlas-flavor-tagging-tools/algorithms/umami"
when: manual
allow_failure: true
# ----------------------------------------------------------------------------
# Publishing:
# copies of the images built in gitlab CI/CD will be deployed to Docker Hub
......
build_docs:
stage: builds
image: gitlab-registry.cern.ch/authoring/documentation/mkdocs:stable
script:
- mkdocs build --strict --clean --site-dir www
- echo -e "AddDefaultCharset UTF-8\nSSLRequireSSL\n" > www/.htaccess
- mkdocs build --strict --clean --site-dir www
- echo -e "AddDefaultCharset UTF-8\nSSLRequireSSL\n" > www/.htaccess
before_script:
- "" # overwrite default, do nothing
- "" # overwrite default, do nothing
artifacts:
paths:
- www
expire_in: 1 hour
paths:
- www
expire_in: 1 hour
only:
- master
......
......@@ -9,7 +9,6 @@
IMAGE_TYPE: "umamibase:latest"
- if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_PROJECT_PATH=="atlas-flavor-tagging-tools/algorithms/umami"
image: '${CI_REGISTRY}/${CI_PROJECT_NAMESPACE}/umami/$IMAGE_TYPE'
retry: 2
.artifact_template: &artifact_template
name: "$CI_JOB_NAME"
......
......@@ -4,7 +4,7 @@ unittest:
script:
- pip install -r requirements.txt
- apt-get update
- apt-get install -y wget
- apt-get install -y wget
- python setup.py develop
- pytest ./umami/tests/unit/preprocessing -v
- pytest ./umami/tests/unit/evaluation_tools -v
......@@ -33,7 +33,6 @@ unittest:
- coverage_files/
reports:
junit: report.xml
retry: 2
unittest_preprocessing:
<<: *unittest_template
......@@ -57,4 +56,4 @@ unittest_input_vars_tools:
<<: *unittest_template
script:
- pytest --cov=./ --cov-report= ./umami/tests/unit/input_vars_tools/ -v --junitxml=report.xml
- cp .coverage coverage_files/.coverage.unittest_input_vars_tools
- cp .coverage coverage_files/.coverage.unittest_input_vars_tools
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment