Commit cc4d1305 authored by Maxime Fernoux's avatar Maxime Fernoux
Browse files

Update r22mf with master

parents c32afadb 4d9c015b
Pipeline #3793317 passed with stages
in 10 minutes and 53 seconds
......@@ -64,24 +64,34 @@ test_pflow:
- ci-pflow
expire_in: 1 hour
test_trackjets:
test_slim:
stage: run
needs: [compile_analysisbase]
script:
- test-dumper -d $PWD/ci-trackjets trackjets
- test-dumper -d $PWD/ci-pflow-slim slim
artifacts:
paths:
- ci-trackjets
- ci-pflow-slim
expire_in: 1 hour
test_all:
test_truth:
stage: run
needs: [compile_analysisbase]
script:
- test-dumper -d $PWD/ci-pflow-all all
- test-dumper -d $PWD/ci-pflow-truth truth
artifacts:
paths:
- ci-pflow-all
- ci-pflow-truth
expire_in: 1 hour
test_trackjets:
stage: run
needs: [compile_analysisbase]
script:
- test-dumper -d $PWD/ci-trackjets trackjets
artifacts:
paths:
- ci-trackjets
expire_in: 1 hour
test_trackless:
......@@ -142,8 +152,10 @@ test_singlebtag_submit:
stage: run
needs: [compile_analysisbase]
script:
- export USER=test
- submit-single-btag -d -f
- export USER=ci-test
- source setup-athena.sh
- lsetup panda
- grid-submit -d -f single-btag
test_configs:
stage: run
......@@ -191,9 +203,9 @@ build_img_tag:
###################################
docs_pflow:
stage: docs
needs: [test_all]
needs: [test_truth]
script:
- docs/scripts/ci-dump-variable-docs pflow ci-pflow-all/output.h5
- docs/scripts/ci-dump-variable-docs pflow ci-pflow-truth/output.h5
artifacts:
paths:
- ci-docs-pflow
......
......@@ -36,6 +36,7 @@ set(LINK_LIBRARIES
JetMomentToolsLib
InDetTrackSystematicsToolsLib
xAODHIEvent
MCTruthClassifierLib
)
# anything that is built conditionally goes here
if (NOT XAOD_STANDALONE)
......@@ -64,7 +65,6 @@ atlas_add_library(dataset-dumper
src/TrackTruthDecorator.cxx
src/TrackVertexDecorator.cxx
src/TrackLeptonDecorator.cxx
src/TrackAmbiDecorator.cxx
src/ConfigFileTools.cxx
src/BTagInputChecker.cxx
src/TruthCorruptionCounter.cxx
......@@ -77,6 +77,7 @@ atlas_add_library(dataset-dumper
src/SingleBTagConfig.cxx
src/SingleBTagTools.cxx
src/JetLeptonDecayLabelDecorator.cxx
src/LeptonTruthDecorator.cxx
PUBLIC_HEADERS src
LINK_LIBRARIES ${LINK_LIBRARIES}
)
......@@ -149,6 +150,6 @@ atlas_install_scripts(
grid/submit-HI
grid/submit-hbb
grid/submit-retag
grid/submit-upgrade
grid/grid-submit
)
#!/usr/bin/env bash
# This script should not be sourced, we don't need anything in here to
# propigate to the surrounding environment.
if [[ $- == *i* ]] ; then
echo "Don't source me!" >&2
return 1
else
# set the shell to exit if there's an error (-e), and to error if
# there's an unset variable (-u)
set -eu
fi
BREAK="----------------------------------------------"
###################################################
# Add some mode switches
###################################################
declare -A SCRIPTS_BY_MODE=(
[single-btag]=dump-single-btag
[retag]=ca-dump-retag
[trigger]=ca-dump-trigger-btag
[trigger-wp]=ca-dump-trigger-workingpoints
[upgrade]=ca-dump-upgrade
[upgrade-HI]=ca-dump-upgrade-HI
)
declare -A CONFIGS_BY_MODE=(
[single-btag]=EMPFlow.json
[retag]=EMPFlow.json
[trigger]=trigger.json
[trigger-wp]=trigger_wp.json
[upgrade]=upgrade.json
[upgrade-HI]=upgrade-HI.json
)
declare -A INPUTS_BY_MODE=(
[single-btag]=single-btag.txt
[retag]=single-btag.txt
[trigger]=trigger.txt
[trigger-wp]=trigger-workingpoints.txt
[upgrade]=upgrade.txt
[upgrade-HI]=upgrade-HI.txt
)
###################################################
# CLI
###################################################
_usage() {
echo "usage: ${0##*/} [-h] [options] MODE"
}
_help() {
_usage
cat <<EOF
Submit dumper jobs to the grid! This script will ask you to commit your changes.
Specify a running MODE (below) and then optionally overwrite the mode's defaults
using the optional flags. You are encoraged to use the -t argument to tag your
submission.
Options:
-s script : Executable to be run (e.g. dump-single-btag)
-c config : Path to the config file to use for the jobs
-i file : File listing input samples, will override the default list
-t tag : Tag the current code state using the supplied string
-f : Force, don't require changes to be committed
-a : Run a test job using only one file per dataset
-d : Dry run, don't submit anything or make a tarball, but build the
submit directory
Modes:
$(for key in "${!CONFIGS_BY_MODE[@]}"; do
echo -e " $key \t=> ${SCRIPTS_BY_MODE[$key]} -c ${CONFIGS_BY_MODE[$key]}";
done)
EOF
}
# defaults
SCRIPT=""; CONFIG=""; INPUT_DATASETS="";
TAG=""; FORCE=""; DRYRUN=""; EXTRA_ARGS=""; ANNOTATE="";
while getopts ":hs:c:i:t:fad" opt $@;
do
case $opt in
h) _help; exit 1;;
s) SCRIPT=${OPTARG};;
c) CONFIG=${OPTARG};;
i) INPUT_DATASETS=${OPTARG};;
t) TAG=${OPTARG};;
f) FORCE=1 ;;
a) EXTRA_ARGS+=' --nFiles 1 '; ANNOTATE+=.test ;;
d) DRYRUN="echo DRY-RUNNING: " ;;
# handle errors
\?) _usage; echo "Unknown option: -$OPTARG" >&2; exit 1;;
:) _usage; echo "Missing argument for -$OPTARG" >&2; exit 1;;
*) _usage; echo "Unimplemented option: -$OPTARG" >&2; exit 1;;
esac
done
shift $((OPTIND-1))
# check required args
if [[ "$#" -ne 1 ]]; then
echo "Please specify a running mode after any optional arguments"
_usage
exit 1
fi
MODE=$1
if [[ -z ${SCRIPTS_BY_MODE[$MODE]+foo} ]]; then
echo "Invalid mode! Run ${0##*/} -h for allowed modes"
exit 1
fi
# this is where all the source files are
BASE=$(realpath --relative-to=$PWD $(dirname $(readlink -e ${BASH_SOURCE[0]}))/../..)
CONFIG_DIR=${BASE}/configs/single-b-tag
INPUTS_DIR=${BASE}/BTagTrainingPreprocessing/grid/inputs
# if arguments are not specified, use mode
if [[ -z "$SCRIPT" ]]; then SCRIPT=${SCRIPTS_BY_MODE[$MODE]}; fi
if [[ -z "$CONFIG" ]]; then CONFIG=$CONFIG_DIR/${CONFIGS_BY_MODE[$MODE]}; fi
if [[ -z "$INPUT_DATASETS" ]]; then INPUT_DATASETS=$INPUTS_DIR/${INPUTS_BY_MODE[$MODE]}; fi
if [[ "$DRYRUN" ]]; then echo -e $BREAK '\nDRY RUNNING'; fi
# let the user know what options we are using
echo $BREAK
echo -e "Script\t: $SCRIPT"
echo -e "Config\t: $CONFIG"
echo -e "Inputs\t: $INPUT_DATASETS"
echo $BREAK
###################################################
# Check for early exit
###################################################
# check arguments
[[ ! -f $CONFIG ]] && (echo "Config file doesn't exist!"; exit 1)
[[ ! -f $INPUT_DATASETS ]] && (echo "Inputs file doesn't exist!"; exit 1)
# check for panda setup
if ! type prun &> /dev/null ; then
echo "ERROR: You need to source the grid setup script before continuing!" >&2
exit 1
fi
# check to make sure you've properly set up the environemnt: if you
# haven't sourced the setup script in the build directory the grid
# submission will fail, so we check here before doing any work.
if ! type $SCRIPT &> /dev/null ; then
echo "ERROR: Code setup with the wrong release or you haven't sourced build/x*/setup.sh" >&2
exit 1
fi
# check for uncontrolled changes
WD=$PWD
if [[ ! $FORCE ]]; then
cd ${BASE}
if ! git diff-index --quiet HEAD; then
echo "ERROR: uncommitted changes, please commit them" >&2; exit 1
fi
cd ${WD}
fi
###################################################
# Some variable definitions
###################################################
# users's grid name
GRID_NAME=${RUCIO_ACCOUNT-${USER}}
######################################################
# Create a tag
######################################################
JOB_ID=$(date +%F-T%H%M%S)
PUSH_TAG=0
if [[ ${TAG} ]]; then
PUSH_TAG=1
cd ${BASE}
ORIGIN_URL=$(git remote get-url origin)
if [[ $ORIGIN_URL != *"$GRID_NAME"* ]]; then
echo "ERROR: Remote URL for \"origin\" is not a fork, please submit from a fork" >&2
exit 1
fi
GIT_TAG=$(git tag --points-at)
if [[ -z ${GIT_TAG} ]]; then
GIT_TAG=$(date +%y-%m-%d)_${TAG}
echo "No tag found, creating tag ${GIT_TAG}"
${DRYRUN} git tag ${GIT_TAG} -m "automated tdd submission tag $(date +%F-T%H:%M:%S)"
else
PUSH_TAG=0
echo "Found and resusing tag ${GIT_TAG}"
fi
JOB_ID=${JOB_ID}.${GIT_TAG}
cd ${WD}
else
echo "You didn't tag the code :(, consider using -t next time"
fi
######################################################
# Prep the submit area
######################################################
# this is the subdirectory we submit from
SUBMIT_DIR=submit
echo "Preparing submit directory"
if [[ -d ${SUBMIT_DIR} ]]; then
echo "Removing old submit directory"
rm -rf ${SUBMIT_DIR}
fi
mkdir ${SUBMIT_DIR}
# write the expanded config file to the submit dir
test-config-merge $CONFIG > ${SUBMIT_DIR%/}/${CONFIG##*/}
cd ${SUBMIT_DIR}
# build a zip of the files we're going to submit
ZIP=job.tgz
echo "Making tarball of local files: ${ZIP}" >&2
# the --outTarBall, --noSubmit, and --useAthenaPackages arguments are
# important. The --outDS and --exec don't matter at all here, they are
# just placeholders to keep panda from complianing.
${DRYRUN} prun --outTarBall=${ZIP} --noSubmit --useAthenaPackages\
--exec "ls"\
--outDS user.${GRID_NAME}.x
######################################################
# Loop over datasets and submit
######################################################
# parse inputs file
INPUT_DATASETS=$(grep -v '^#' ${WD}/$INPUT_DATASETS)
INPUT_DATASETS=($INPUT_DATASETS)
# loop over all inputs
echo $BREAK
$DRYRUN
echo "Submitting ${#INPUT_DATASETS[*]} datasets as ${GRID_NAME}..."
echo $BREAK
# define a fucntion to do all the submitting
function submit-job() (
set -eu
DS=$1
# this regex extracts the DSID from the input dataset name, so
# that we can give the output dataset a unique name. It's not
# pretty: ideally we'd just suffix our input dataset name with
# another tag. But thanks to insanely long job options names we
# use in the generation stage we're running out of space for
# everything else.
DSID=$(sed -r 's/[^\.]*\.([0-9]{6,8})\..*/\1/' <<< ${DS})
# build the full output dataset name
CONFIG_FILE=${CONFIG##*/}
# this terrible regex extracts the atlas tags, e.g. e4342_s3443...
ATLAS_TAGS=$(egrep -o '(_?[esdfarp][0-9]{3,6}){3,}' <<< ${DS})
TAGS=${ATLAS_TAGS}.tdd.${CONFIG_FILE%.*}.${AtlasBuildStamp}
OUT_DS=user.${GRID_NAME}.${DSID}.${TAGS}.${JOB_ID}${ANNOTATE}
# check to make sure the grid name isn't too long
if [[ $(wc -c <<< ${OUT_DS}) -ge 120 ]] ; then
echo "ERROR: dataset name ${OUT_DS} is too long, can't submit!" 1>&2
return 1
fi
# now submit
printf "${DS} \n\t-> ${OUT_DS}\n"
${DRYRUN} prun --exec "${SCRIPT} %IN -c ${CONFIG_FILE}"\
--outDS ${OUT_DS} --inDS ${DS}\
--useAthenaPackages --inTarBall=${ZIP}\
--mergeScript="hdf5-merge-nolock -o %OUT -i %IN"\
--outputs output.h5\
--noEmail \
${EXTRA_ARGS} > ${OUT_DS}.log 2>&1
)
# we have to export some environment variables so xargs can read them
export -f submit-job
export CONFIG GRID_NAME JOB_ID ZIP AtlasBuildStamp SCRIPT DRYRUN EXTRA_ARGS ANNOTATE
# use xargs to submit all these jobs in batches of 10
printf "%s\n" ${INPUT_DATASETS[*]} | xargs -P 10 -I {} bash -c "submit-job {}"
echo $BREAK
echo "Submission successful"
######################################################
# Push tags if necessary
######################################################
if [[ ${TAG} && ${PUSH_TAG} != 0 ]]; then
echo "Pushing tag ${GIT_TAG}"
cd ../${BASE}
ORIGIN_URL=$(git remote get-url origin)
if [[ $ORIGIN_URL != *"$GRID_NAME"* ]]; then
echo "ERROR: Remote URL for \"origin\" is not a fork, please submit from a fork" >&2
exit 1
fi
${DRYRUN} git push -q origin ${GIT_TAG}
cd ${WD}
fi
echo $BREAK
mc20_13TeV.410470.PhPy8EG_A14_ttbar_hdamp258p75_nonallhad.deriv.DAOD_PHYSVAL.e6337_s3681_r13167_p4931
mc20_13TeV.800030.Py8EG_A14NNPDF23LO_flatpT_Zprime_Extended.deriv.DAOD_PHYSVAL.e7954_s3681_r13144_p4931
\ No newline at end of file
# Samples from tuning campaign, see
#
# https://its.cern.ch/jira/browse/ATLMCPROD-9424
#
mc16_13TeV.410470.PhPy8EG_A14_ttbar_hdamp258p75_nonallhad.recon.AOD.e6337_e5984_s3126_d1677_r12711
mc16_13TeV.427080.Pythia8EvtGen_A14NNPDF23LO_flatpT_Zprime.recon.AOD.e5362_e5984_s3126_d1677_r12711
#
# Similar samples from jet tuning campaign, see
#
# https://its.cern.ch/jira/browse/ATR-22610
# https://its.cern.ch/jira/browse/ATLMCPROD-9423
#
# JZ1 to JZ4 is probably pretty ok, based on the slicing here:
#
# https://gitlab.cern.ch/atlas-physics/pmg/infrastructure/mc15joboptions/-/blob/ffe400a1b619910790f3c64825ac82a0139f95bf/common/Filters/JetFilter_JZX_Fragment.py
mc16_13TeV.800036.Py8EG_A14N23LO_jetjet_JZ1WwithSW.recon.AOD.e7914_s3126_d1677_r12711
mc16_13TeV.364702.Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ2WithSW.recon.AOD.e7142_s3126_d1677_r12711
mc16_13TeV.364703.Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ3WithSW.recon.AOD.e7142_s3126_d1677_r12711
mc16_13TeV.364704.Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ4WithSW.recon.AOD.e7142_s3126_d1677_r12711
# mc16_5TeV.800893.Py8EG_A14N23LO_jetjet_JZ1WithSW_bfilter.merge.AOD.e8366_d1521_r11472_r11217
# mc16_5TeV.800894.Py8EG_A14N23LO_jetjet_JZ2WithSW_bfilter.merge.AOD.e8366_d1521_r11472_r11217
# mc16_5TeV.800895.Py8EG_A14N23LO_jetjet_JZ3WithSW_bfilter.merge.AOD.e8366_d1521_r11472_r11217
# mc16_5TeV.800896.Py8EG_A14N23LO_jetjet_JZ4WithSW_bfilter.merge.AOD.e8366_d1521_r11472_r11217
mc16_5TeV.800897.Py8EG_A14N23LO_jetjet_JZ5WithSW_bfilter.merge.AOD.e8366_d1521_r11472_r11217
\ No newline at end of file
mc15_14TeV.800030.Py8EG_A14NNPDF23LO_flatpT_Zprime_Extended.recon.AOD.e8185_s3654_s3657_r12574
mc15_14TeV.600012.PhPy8EG_A14_ttbar_hdamp258p75_nonallhad.recon.AOD.e8185_s3654_s3657_r12573
mc15_14TeV.800030.Py8EG_A14NNPDF23LO_flatpT_Zprime_Extended.recon.AOD.e8185_s3654_s3657_r12440
\ No newline at end of file
......@@ -36,4 +36,3 @@ _voms_proxy_long ()
if ! _voms_proxy_long; then return 1; fi
if ! lsetup panda -q; then return 1; fi
if ! lsetup git -q; then return 1; fi
#!/usr/bin/env bash
# This script should not be sourced, we don't need anything in here to
# propigate to the surrounding environment.
if [[ $- == *i* ]] ; then
echo "Don't source me!" >&2
return 1
else
# set the shell to exit if there's an error (-e), and to error if
# there's an unset variable (-u)
set -eu
fi
##########################
# Real things start here #
##########################
#
# part 0 is parsing arguments
#
# Some default values
#
P=10
#
MODE=default
#
# Add some mode switches
declare -A DEFAULT_CONFIGS_BY_MODE=(
[default]=upgrade-HI.json
)
declare -A SCRIPTS_BY_MODE=(
[default]=ca-dump-upgrade-HI
)
#
INPUT_DATASETS=(
# mc16_5TeV.800893.Py8EG_A14N23LO_jetjet_JZ1WithSW_bfilter.merge.AOD.e8366_d1521_r11472_r11217
# mc16_5TeV.800894.Py8EG_A14N23LO_jetjet_JZ2WithSW_bfilter.merge.AOD.e8366_d1521_r11472_r11217
# mc16_5TeV.800895.Py8EG_A14N23LO_jetjet_JZ3WithSW_bfilter.merge.AOD.e8366_d1521_r11472_r11217
# mc16_5TeV.800896.Py8EG_A14N23LO_jetjet_JZ4WithSW_bfilter.merge.AOD.e8366_d1521_r11472_r11217
mc16_5TeV.800897.Py8EG_A14N23LO_jetjet_JZ5WithSW_bfilter.merge.AOD.e8366_d1521_r11472_r11217
)
function _help() {
local DEF_CFG_DEF_MODE=${DEFAULT_CONFIGS_BY_MODE[$MODE]}
cat <<EOF
usage: ${0##*/} [-h] [options] [CONFIG_FILE=${DEF_CFG_DEF_MODE}}]
Submit a job to the grid! The CONFIG_FILE is json, and should live
under /configs in this package. Note that this script will ask you to
commit your changes, but it's a good idea to tag them as well.
Options:
-f: Force, don't require changes to be committed.
-p <n>: number of parallel submissions to run (default $P)
-d: Dry run, don't submit anything or make a tarball, but build the
submit directory.
-i <file>: File listing input files, will override the default list.
-t: run a test job, only one file per dataset
-a <tag>: annotate output with an additional tag
-m <mode>: set run mode (options: ${!SCRIPTS_BY_MODE[@]})
Run mode options:
EOF
local M
local F1=" " # <-- padding for the first field
local F2=" " # <-- padding for the second field
for M in ${!DEFAULT_CONFIGS_BY_MODE[@]} ; do
local CFG=${DEFAULT_CONFIGS_BY_MODE[$M]}
local SCR=${SCRIPTS_BY_MODE[$M]}
echo " $M${F1:${#M}} ==> config: $CFG${F2:${#CFG}} script: $SCR"
done
cat <<EOF
With the current arguments, will run over:
EOF
local F
for F in ${INPUT_DATASETS[@]} ; do
echo $F
done
}
FORCE=''
# this might be 'echo' to do a dry run
PRE=''
EXTRA_ARGS=''
# any additional dataset names. Note that anyone who adds to this has
# to provide the leading `.`
ANNOTATE=''
while getopts ":hfp:di:ta:m:" opt $@;
do
case $opt in
h) _help; exit 1;;
f) FORCE=1 ;;
p) P=${OPTARG} ;;
d) PRE="echo DRY-RUNNING: " ;;
i) readarray -t INPUT_DATASETS < ${OPTARG} ;;
t) EXTRA_ARGS+=' --nFiles 1 '; ANNOTATE+=.test ;;
a) ANNOTATE+=.${OPTARG} ;;
m) MODE=${OPTARG} ;;
esac
done
shift $((OPTIND-1))
###################################################
# Part 1: variables you you _might_ need to change
###################################################
#
# Users's grid name
GRID_NAME=${RUCIO_ACCOUNT-${USER}}
#
######################################################
# Part 2: variables you probably don't have to change
######################################################
#
# Build a zip of the files we're going to submit
ZIP=job.tgz
#
# This is the subdirectory we submit from
SUBMIT_DIR=submit
#
# This is where all the source files are
BASE=$(dirname $(readlink -e ${BASH_SOURCE[0]}))/../..
#
# Configuration file stuff
DEFAULT_CONFIG=${DEFAULT_CONFIGS_BY_MODE[$MODE]}
DEFAULT_CONFIG_PATH=${BASE}/configs/single-b-tag/${DEFAULT_CONFIG}
#
# The executable
EXE=${SCRIPTS_BY_MODE[$MODE]}
#
# Check that we don't have uncontrolled changes
export BASE
GIT_TAG=$(
cd $BASE
if ! git diff-index --quiet HEAD; then
if [[ $FORCE ]]; then
date +%F-T%H%M%S
exit 0
fi
echo "ERROR: uncommitted changes, please commit them" >&2
exit 1
fi
git describe
)
###################################################
# Part 3: prep the submit area
###################################################
#
echo "preping submit area"
if [[ -d ${SUBMIT_DIR} ]]; then
echo "removing old submit directory"
rm -rf ${SUBMIT_DIR}
fi
mkdir ${SUBMIT_DIR}
CONFIG_PATH=${1-${DEFAULT_CONFIG_PATH}}
echo "using config file ${CONFIG_PATH}"
cp ${CONFIG_PATH} ${SUBMIT_DIR}
# make sure we send files that the configuration depends on too