Commit f71aa026 authored by Martino Tanasini's avatar Martino Tanasini
Browse files

add retagging on the grid

parent ec569641
Pipeline #3324672 passed with stages
in 9 minutes and 9 seconds
......@@ -128,5 +128,6 @@ atlas_install_scripts(
grid/submit-single-btag
grid/submit-trigger
grid/submit-hbb
grid/submit-retag
)
......@@ -16,11 +16,13 @@ from GaudiKernel.Configurable import DEBUG, INFO
from argparse import ArgumentParser
from itertools import chain
def get_args():
parser = ArgumentParser(description=__doc__)
parser.add_argument('input_files', nargs='+')
parser.add_argument('-o','--output', default='test.h5')
parser.add_argument('-o','--output', default='output.h5', **dh)
parser.add_argument('-c','--config-file', required=True)
parser.add_argument('-m','--max-events', type=int, nargs='?', const=10)
parser.add_argument('-d','--debug', action='store_true')
......@@ -80,7 +82,9 @@ def setupCondDb(cfgFlags, taggerlist):
def run():
args = get_args()
cfgFlags.Input.Files = args.input_files
cfgFlags.Input.Files = list(
chain.from_iterable(f.split(',') for f in args.input_files))
if args.max_events:
cfgFlags.Exec.MaxEvents = args.max_events
......
#!/usr/bin/env bash
# This script should not be sourced, we don't need anything in here to
# propigate to the surrounding environment.
if [[ $- == *i* ]] ; then
echo "Don't source me!" >&2
return 1
else
# set the shell to exit if there's an error (-e), and to error if
# there's an unset variable (-u)
set -eu
fi
##########################
# Real things start here #
##########################
###################################################
# Part 1: variables you you _might_ need to change
###################################################
#
# Users's grid name
GRID_NAME=${RUCIO_ACCOUNT-${USER}}
#
# This job's tag (the current expression is something random)
BATCH_TAG=$(date +%F-T%H%M%S)-R${RANDOM}
# BATCH_TAG=v0
DEFAULT_CONFIG=EMPFlowGNN.json
# R21 samples
#INPUT_DATASETS=(
# mc16_13TeV.410470.PhPy8EG_A14_ttbar_hdamp258p75_nonallhad.deriv.DAOD_FTAG1.e6337_s3126_r9364_p3985
# mc16_13TeV.427080.Pythia8EvtGen_A14NNPDF23LO_flatpT_Zprime.deriv.DAOD_FTAG1.e5362_s3126_r9364_p3985
# mc16_13TeV.410470.PhPy8EG_A14_ttbar_hdamp258p75_nonallhad.deriv.DAOD_FTAG1.e6337_s3126_r10201_p3985
# mc16_13TeV.427080.Pythia8EvtGen_A14NNPDF23LO_flatpT_Zprime.deriv.DAOD_FTAG1.e5362_s3126_r10201_p3985
# mc16_13TeV.427081.Pythia8EvtGen_A14NNPDF23LO_flatpT_Zprime_Extended.deriv.DAOD_FTAG1.e6928_e5984_s3126_r10201_r10210_p3985
# mc16_13TeV.410470.PhPy8EG_A14_ttbar_hdamp258p75_nonallhad.deriv.DAOD_FTAG1.e6337_s3126_r10724_p3985
# mc16_13TeV.427080.Pythia8EvtGen_A14NNPDF23LO_flatpT_Zprime.deriv.DAOD_FTAG1.e5362_s3126_r10724_p3985
#)
# R22 validation samples from ATR-23018
INPUT_DATASETS=(
mc16_13TeV.410470.PhPy8EG_A14_ttbar_hdamp258p75_nonallhad.deriv.DAOD_PHYSVAL.e6337_e5984_s3126_r12629_p4724
)
######################################################
# Part 2: variables you probably don't have to change
######################################################
#
# Build a zip of the files we're going to submit
ZIP=job.tgz
#
# This is the subdirectory we submit from
SUBMIT_DIR=submit
#
# This is where all the source files are
BASE=$(dirname $(readlink -e ${BASH_SOURCE[0]}))/../..
#
# Configuration file stuff
DEFAULT_CONFIG_PATH=${BASE}/configs/single-b-tag/${DEFAULT_CONFIG}
###################################################
# Part 3: prep the submit area
###################################################
#
echo "preping submit area"
if [[ -d ${SUBMIT_DIR} ]]; then
echo "removing old submit directory"
rm -rf ${SUBMIT_DIR}
fi
mkdir ${SUBMIT_DIR}
# create fragments dir in submission dir for *variables.json and *cuts.json
SUBMIT_DIR_FRAGMENTS=${SUBMIT_DIR}/fragments
mkdir -p ${SUBMIT_DIR_FRAGMENTS}
CONFIG_PATH=${1-${DEFAULT_CONFIG_PATH}}
echo "using config file ${CONFIG_PATH}"
cp ${CONFIG_PATH} ${SUBMIT_DIR}
# make sure we send files that the configuration depends on too
cp ${DEFAULT_CONFIG_PATH%/*}/fragments/*.json ${SUBMIT_DIR_FRAGMENTS}
cd ${SUBMIT_DIR}
##########################################
# Part 4: build a tarball of the job
###########################################
#
# Check to make sure you've properly set up the environemnt: if you
# haven't sourced the setup script in the build directory the grid
# submission will fail, so we check here before doing any work.
if ! type dump-single-btag &> /dev/null ; then
echo "You haven't sourced x86*/setup.sh, job will fail!" >&2
echo "quitting..." >&2
exit 1
fi
#
echo "making tarball of local files: ${ZIP}" >&2
#
# The --outTarBall, --noSubmit, and --useAthenaPackages arguments are
# important. The --outDS and --exec don't matter at all here, they are
# just placeholders to keep panda from complianing.
prun --outTarBall=${ZIP} --noSubmit --useAthenaPackages\
--exec "ls"\
--outDS user.${GRID_NAME}.x
##########################################
# Part 5: loop over datasets and submit
##########################################
# Loop over all inputs
echo "submitting for ${#INPUT_DATASETS[*]} datasets"
#
for DS in ${INPUT_DATASETS[*]}
do
# This regex extracts the DSID from the input dataset name, so
# that we can give the output dataset a unique name. It's not
# pretty: ideally we'd just suffix our input dataset name with
# another tag. But thanks to insanely long job options names we
# use in the generation stage we're running out of space for
# everything else.
DSID=$(sed -r 's/[^\.]*\.([0-9]{6,8})\..*/\1/' <<< ${DS})
#
# Build the full output dataset name
CONFIG_FILE=${CONFIG_PATH##*/}
TAGS=$(cut -d . -f 6 <<< ${DS}).${CONFIG_FILE%.*}
OUT_DS=user.${GRID_NAME}.${DSID}.btagTraining.${TAGS}.${BATCH_TAG}
#
# Now submit.
#
echo "Submitting for ${GRID_NAME} on ${DS} -> ${OUT_DS}"
prun --exec "ca-dump-retag %IN -c ${CONFIG_FILE}"\
--outDS ${OUT_DS} --inDS ${DS}\
--useAthenaPackages --inTarBall=${ZIP}\
--outputs output.h5\
--noEmail > ${OUT_DS}.log 2>&1 &
sleep 1
done
wait
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment