Skip to content
Snippets Groups Projects

Qa build cms mlpf

7 files
+ 119
65
Compare changes
  • Side-by-side
  • Inline
Files
7
@@ -9,74 +9,134 @@ else FLAVOR="unknown";
fi
# Default config
NEPOCHS=1
NEPOCHS=2
NTRAIN=0 # 0 is None
NTEST=0 # 0 is None
BSIZE=0 # 0 is Default
BSIZE=4 # 0 is Default
NDEVICES=0 # 0 is Default
RESULTSDIR=/results
DEBUG=0
resultsDir="/results"
skipSubDir=0
MOP="none"
DESCRIPTION="Machine Learning Particle Flow (MLPF) benchmark"
log() {
case $1 in
error) shift 1; echo -e "\e[31m>>> ERROR:\e[0m $*\n" | tee -a $RESULTSDIR/out.log ; exit 2 ;;
info) shift 1; echo -e "\e[34m$*\e[0m\n" | tee -a $RESULTSDIR/out.log ;;
silent) shift 1; echo "$*" >> $RESULTSDIR/out.log ;;
*) echo "$*" | tee -a $RESULTSDIR/out.log ;
error) shift 1; echo -e "\e[31m>>> ERROR:\e[0m $*\n" | tee -a $resultsDir/out.log ; exit 2 ;;
info) shift 1; echo -e "\e[34m$*\e[0m\n" | tee -a $resultsDir/out.log ;;
silent) shift 1; echo "$*" >> $resultsDir/out.log ;;
*) echo "$*" | tee -a $resultsDir/out.log ;
esac
}
usage() {
echo "Usage: $0 [options]"
echo "Options:"
echo "-h, --help Prints this message and exit."
echo "-w, --resultsdir <str> Results directory. Default: $RESULTSDIR"
echo "-e, --nepochs <int> Number of epochs. Default: $NEPOCHS"
echo "-B, --batch_size <int> Batch size per device. Default: $BSIZE"
echo "-D, --num_devices <int> Number of devices to use. Default: $NDEVICES"
echo " --ntrain <int> Train steps limit. Default: $NTRAIN"
echo " --ntest <int> Test steps limit. Default: $NTEST"
exit 0
function usage(){
echo ""
echo "Usage: $0 [-w | --resultsdir <resultsDir>] [-W] [-c | --copies <NCOPIES>] [-n | --nepochs <NEPOCHS>] " \
"[-B | --batch_size <BSIZE>] [-D | --num_devices <NDEVICES>] [--ntrain <NTRAIN>] [--ntest <NTEST>] " \
"[-m | --mop <mode>] [-d | --debug] [-h | --help]"
echo " -w --resultsdir <resultsDir> : (path) results directory (default: /results , current: $resultsDir)"
echo " -W : store results in <resultsDir> directly"
echo " -n --nepochs : (int) Number of epochs (default: 1, current: $NEPOCHS)"
echo " -B --batch_size : (int) Batch size per device (default: 0, current: $BSIZE)"
echo " -D --num_devices : (int) Number of devices to use (default: 0, current: $NDEVICES)"
echo " --ntrain : (int) Train steps limit (default: 0, current: $NTRAIN)"
echo " --ntest : (int) Test steps limit (default: 0, current: $NTEST)"
echo " -m --mop : (none|all|custom) clean working directory mode: none/all/custom (current: $MOP)"
echo " -d --debug : debug mode"
echo " -h --help : display this help and exit"
echo ""
echo "Mop mode:
none == do not remove working files,
all == remove all produced files (but summary json),
custom == custom implementation"
echo "Without -W (default): results are stored in a new subdirectory of <resultsDir>:"
echo " <resultsDir>/<uniqueid>/*.json"
echo " <resultsDir>/<uniqueid>/proc_1/*.log"
echo " <resultsDir>/<uniqueid>/proc_.../*.log"
echo " <resultsDir>/<uniqueid>/proc_<COPIES>/*.log"
echo "With -W (e.g. in the CI): results are stored in <resultsDir> directly:"
echo " <resultsDir>/*.json"
echo " <resultsDir>/proc_1/*.log"
echo " <resultsDir>/proc_.../*.log"
echo " <resultsDir>/proc_<NCOPIES>/*.log"
echo ""
echo "Without -w (default) and without -W: <resultsDir> is /results"
echo "Without -w (default) and with -W: <resultsDir> is a tmp directory /tmp/xxxx"
echo ""
if [ "$(type -t usage_detailed)" == "function" ]; then
echo -e "\nDetailed Usage:\n----------------\n"
( usage_detailed ) # as a subprocess, just in case this has a 0 exit code...
fi
echo -e "DESCRIPTION\n"
if [ -e $BMKDIR/DESCRIPTION ]; then
cat $BMKDIR/DESCRIPTION
else
echo "Sorry there is no description included."
fi
echo ""
exit 2 # early termination (help or invalid arguments to benchmark script)
}
parse_args() {
options=$(getopt --long resultsdir:,nepochs:,ntrain:,ntest:,batch_size:,num_devices:,help -o wWeDB:h -- "$@")
options=$(getopt -a -n cms-mlpf-bmk -o w:Wm:n:dD:B:h --long resultsdir:,nepochs:,ntrain:,ntest:,batch_size:,num_devices:,debug,help,mop -- "$@")
if [ $? != 0 ]; then echo "Invalid options provided." >&2; usage; fi
eval set -- "$options"
while true; do
case $1 in
case "$1" in
--help | -h ) usage; exit 0;;
--resultsdir | -w ) RESULTSDIR=$2; shift ;;
--ntrain ) NTRAIN=$2; shift ;;
--ntest ) NTEST=$2; shift ;;
--nepochs | -e ) NEPOCHS=$2; shift ;;
--num_devices | -D ) NDEVICES=$2; shift ;;
--batch_size | -B ) BSIZE=$2; shift ;;
--debug | -d ) DEBUG=1 ;;
-W ) skipSubDir=1;;
--mop | -m ) MOP="$2"; shift;;
--resultsdir | -w ) resultsDir="$2"; shift;;
--ntrain ) NTRAIN="$2"; shift;;
--ntest ) NTEST="$2"; shift;;
--nepochs | -n ) NEPOCHS="$2"; shift;;
--num_devices | -D ) NDEVICES="$2"; shift;;
--batch_size | -B ) BSIZE="$2"; shift;;
-- ) shift; break;;
esac
shift
done
}
# TODO: implement MOP, DEBUG
parse_args $*
if [ -f "$RESULTSDIR"/out.log ]; then rm "$RESULTSDIR"/out.log; fi
log info "Base working directory: $RESULTSDIR"
log info "Running benchmark MLPF"
if [ -f "$resultsDir"/out.log ]; then rm "$resultsDir"/out.log; fi
log info "Base working directory: $resultsDir"
# set CUDA_VISIBLE_DEVICES for tensorflow based on nvidia-smi (dirty nvidia-only check)
if type -P "nvidia-smi" &>/dev/null; then
DEVICES=$(nvidia-smi -L | wc -l)
log info "Detected $DEVICES nvidia GPUs"
export CUDA_VISIBLE_DEVICES=$(seq -s, 0 $(($DEVICES-1)))
fi
cd /workspace/particleflow/
log info "Running benchmark MLPF"
log silent "Executing 'python3 mlpf/pipeline.py train \
--config parameters/delphes-benchmark.yaml \
--prefix /tmp/train_ \
--plot-freq 1000000 \
--benchmark_dir $resultsDir \
--num_devices $NDEVICES \
--batch_size $BSIZE \
--nepochs $NEPOCHS \
--ntrain $NTRAIN \
--ntest $NTEST'"
cd /bmk/cms-mlpf/particleflow/
python3 mlpf/pipeline.py train \
--config parameters/delphes-benchmark.yaml \
--prefix /tmp/train_ \
--plot-freq 1000000 \
--benchmark_dir $RESULTSDIR \
--benchmark_dir $resultsDir \
--num_devices $NDEVICES \
--batch_size $BSIZE \
--nepochs $NEPOCHS \
--ntrain $NTRAIN \
--ntest $NTEST
REPORT=$(cat $RESULTSDIR/result.json)
REPORT=$(cat $resultsDir/result.json)
generate_json() {
jq -n \
@@ -95,6 +155,11 @@ generate_json() {
}
}'
}
generate_json > $RESULTSDIR/mlpf-report.json
log info "Finished running MLPF"
mkdir -p $resultsDir/report
if [ $skipSubDir -eq 0 ]; then
REPORT_PATH=$resultsDir/report/cms-mlpf_summary.json
else
REPORT_PATH=$resultsDir/cms-mlpf_summary.json
fi
generate_json > $REPORT_PATH
log info "Finished running MLPF. Final report written to $REPORT_PATH"
Loading