From a276422e8cea6e9b5884bc6ffea3490bd4f094ab Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Tue, 30 Jun 2020 20:37:28 +0200 Subject: [PATCH 01/74] added patatrack --- cms/patatrack/Dockerfile.nvidia | 83 ++++ cms/patatrack/cms-patatrack.spec | 7 + cms/patatrack/cms-patatrack/DESCRIPTION | 4 + cms/patatrack/cms-patatrack/bmk-driver.sh | 433 ++++++++++++++++++ .../cms-patatrack/cms-patatrack-bmk.sh | 56 +++ cms/patatrack/cms-patatrack/cms-reco-bmk.sh | 93 ++++ cms/patatrack/cms-patatrack/cvmfs/.keepme | 0 cms/patatrack/cms-patatrack/data/.keepme | 0 cms/patatrack/cms-patatrack/parseResults.sh | 67 +++ cms/patatrack/cms-patatrack/prepare-patch.sh | 26 ++ cms/patatrack/cms-patatrack/test_parser.sh | 2 + 11 files changed, 771 insertions(+) create mode 100644 cms/patatrack/Dockerfile.nvidia create mode 100644 cms/patatrack/cms-patatrack.spec create mode 100644 cms/patatrack/cms-patatrack/DESCRIPTION create mode 100644 cms/patatrack/cms-patatrack/bmk-driver.sh create mode 100755 cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh create mode 100755 cms/patatrack/cms-patatrack/cms-reco-bmk.sh create mode 100644 cms/patatrack/cms-patatrack/cvmfs/.keepme create mode 100644 cms/patatrack/cms-patatrack/data/.keepme create mode 100644 cms/patatrack/cms-patatrack/parseResults.sh create mode 100755 cms/patatrack/cms-patatrack/prepare-patch.sh create mode 100755 cms/patatrack/cms-patatrack/test_parser.sh diff --git a/cms/patatrack/Dockerfile.nvidia b/cms/patatrack/Dockerfile.nvidia new file mode 100644 index 0000000..66c4928 --- /dev/null +++ b/cms/patatrack/Dockerfile.nvidia @@ -0,0 +1,83 @@ +FROM nvidia/cuda:10.1-devel-centos7 + +RUN yum install -y \ + which \ + man \ + file \ + util-linux \ + gcc \ + wget \ + tar freetype \ + perl perl-Data-Dumper \ + patch git vim; yum clean all + + +RUN yum --enablerepo=extras install epel-release -y + +RUN yum install -y python2-pip + +RUN pip install --upgrade pip +RUN pip install numpy scipy + + +# Prepare a data directory for downloading large files that should normally be cacheable (BMK-159) +# Its contents should be retrieved in Dockerfile.append, before /bmk/<bmkdir> is copied over +# Each file it contains is then individually symlinked to /bmk/<bmkdir>/data/<file> in Dockerfile.template +RUN mkdir -p /bmk/data + + +# Add here any workload-specific Dockerfile instructions. +# They will be appended to the Dockerfile generated from a common template. + + +RUN echo -e "\nExtracting Patatrack dataset..."; \ + wget -q https://hep-benchmarks.web.cern.ch/hep-benchmarks/hep-workloads/data/cms/patatrack/opendata.tar -O /bmk/data/opendata.tar; \ + cd /bmk/data/; tar -xvf ./opendata.tar + +# ********* DOCKERFILE TEMPLATE start ********* +# ******* PLEASE DO NOT EDIT THIS FILE! ******* +# This is the common template for all HEP workloads (BMK-124 and BMK-159). +# Please add workload-specific instructions in Dockerfile.append. + +# Optionally allow disabling the cache only from this point onwards if using +# docker build -t your-image --build-arg CACHEBUST=$(date +%s) . +# See https://github.com/moby/moby/issues/1996#issuecomment-185872769 + +###ARG CACHEBUST=1 + +###RUN echo CACHEBUST=$CACHEBUST + +# This should normally contain always the same files and be cacheable (BMK-159) +COPY ./cvmfs /cvmfs + +RUN tar -cf /tmp/cvmfs_checksum.tar /cvmfs && md5sum /tmp/cvmfs_checksum.tar | cut -f1 -d" " > /tmp/cvmfs_checksum && rm /tmp/cvmfs_checksum.tar + +# This should normally contain always the same files and be cacheable (BMK-159) +RUN tar -cf /tmp/bmkdata_checksum.tar /bmk/data && md5sum /tmp/bmkdata_checksum.tar | cut -f1 -d" " > /tmp/bmkdata_checksum && rm /tmp/bmkdata_checksum.tar + +# This may also be cacheable in most cases except when /bmk contents change +COPY ./cms-patatrack /bmk/./cms-patatrack + +RUN ./cms-patatrack/prepare-patch.sh + +#COPY common/bmk-driver.sh /bmk/./cms-patatrack/bmk-driver.sh + +RUN if [ ! -d /bmk/./cms-patatrack/data ]; then mkdir /bmk/./cms-patatrack/data; fi + +RUN for file in $(cd /bmk/data; ls); do ln -sf /bmk/data/$file /bmk/./cms-patatrack/data/$file; done + +RUN tar -cf /tmp/bmk_checksum.tar /bmk && md5sum /tmp/bmk_checksum.tar | cut -f1 -d" " > /tmp/bmk_checksum && rm /tmp/bmk_checksum.tar + +RUN cvmfs_checksum=`cat /tmp/cvmfs_checksum` && bmkdata_checksum=`cat /tmp/bmkdata_checksum` && bmk_checksum=`cat /tmp/bmk_checksum` && rm /tmp/cvmfs_checksum /tmp/bmkdata_checksum /tmp/bmk_checksum && echo '{"version":"v1.3","description":"CMS RECO of ttbar events, based on CMSSW_10_2_9","cvmfs_checksum":"'$cvmfs_checksum'","bmkdata_checksum":"'$bmkdata_checksum'","bmk_checksum":"'$bmk_checksum'"}' > /bmk/./cms-patatrack/version.json + +ENTRYPOINT ["/bmk/./cms-patatrack/cms-patatrack-bmk.sh"] + +# This contains provenance data that can never be cached +COPY ./cvmfs.provenance /cvmfs.provenance + +# Add user 'bmkuser' to run benchmarks as a non-root user (BMK-166 and BMK-167) +#RUN groupadd bmkuser + +#RUN useradd -g bmkuser --create-home --shell /bin/bash bmkuser + +# ********* DOCKERFILE TEMPLATE end ********* diff --git a/cms/patatrack/cms-patatrack.spec b/cms/patatrack/cms-patatrack.spec new file mode 100644 index 0000000..24be9b6 --- /dev/null +++ b/cms/patatrack/cms-patatrack.spec @@ -0,0 +1,7 @@ +HEPWL_BMKEXE=cms-patatrack-bmk.sh +HEPWL_BMKOPTS="-t 4 -e 3" +HEPWL_BMKDIR=cms-patatrack +HEPWL_BMKDESCRIPTION="CMS PATATRACK, based on CMSSW_10_2_9" +HEPWL_DOCKERIMAGENAME=cms-patatrack-bmk +HEPWL_DOCKERIMAGETAG=v0.1 +HEPWL_CVMFSREPOS=cms.cern.ch diff --git a/cms/patatrack/cms-patatrack/DESCRIPTION b/cms/patatrack/cms-patatrack/DESCRIPTION new file mode 100644 index 0000000..d3a74fb --- /dev/null +++ b/cms/patatrack/cms-patatrack/DESCRIPTION @@ -0,0 +1,4 @@ +THIS IS FOR CMS-RECO. FIX for patatrack +Reconstruction and analysis data creation. +The application is multi-threaded and requires an input data file containing simulated events. +The score consists of throughput (events per second) and CPU (CPU seconds per event). \ No newline at end of file diff --git a/cms/patatrack/cms-patatrack/bmk-driver.sh b/cms/patatrack/cms-patatrack/bmk-driver.sh new file mode 100644 index 0000000..b119f06 --- /dev/null +++ b/cms/patatrack/cms-patatrack/bmk-driver.sh @@ -0,0 +1,433 @@ +if [ "$BASH_SOURCE" = "$0" ]; then echo "ERROR! This script ($0) was not sourced"; exit 1; fi +if [ "$BASH_SOURCE" = "" ]; then echo "ERROR! This script was not sourced from bash"; return 1; fi + +bmkDriver=$(basename ${BASH_SOURCE}) +bmkScript=$(basename $0) +BMKDIR=$(cd $(dirname $0); pwd) + +function advertise_bmkdriver(){ + echo -e "\n========================================================================" + echo -e "[$bmkDriver] $(date) entering common benchmark driver" + echo -e "========================================================================\n" + echo -e "[$bmkDriver] entering from $bmkScript\n" + # Dump workload-specific directory + echo -e "[$bmkDriver] benchmark directory BMKDIR=${BMKDIR}:\n" + ls -lRt $BMKDIR + if [ -d $BMKDIR/../data ]; then + echo -e "\n[$bmkDriver] data directory ${BMKDIR}/../data:\n" + ls -lRt $BMKDIR/../data + fi + echo +} + +# Check that mandatory functions exist or load them otherwise +function check_mandatory_functions(){ + # Check that function doOne has been defined + if [ "$(type -t doOne)" != "function" ]; then + echo "[$bmkDriver] ERROR! Function 'doOne' must be defined in $bmkScript" # internal error (missing code) + exit 1; + fi + # Check that function parseResults has been defined, otherwise load it from parseResults.sh + if [ "$(type -t parseResults)" != "function" ]; then + echo "[$bmkDriver] load parseResults.sh (function 'parseResults' is not defined in $bmkScript)" + if [ -f ${BMKDIR}/parseResults.sh ]; then + echo -e "[$bmkDriver] sourcing ${BMKDIR}/parseResults.sh\n" + . ${BMKDIR}/parseResults.sh + if [ "$(type -t parseResults)" != "function" ]; then + echo "[$bmkDriver] ERROR! Function 'parseResults' must be defined in $bmkScript or parseResults.sh" # internal error (missing code) + exit 1; + fi + else + echo -e "[$bmkDriver] ERROR! 'parseResults' not defined and ${BMKDIR}/parseResults.sh not found\n" # internal error (missing code) + exit 1 + fi + fi +} + +# Check that mandatory variables have been defined (default values) +function check_mandatory_variables(){ + # Variables NCOPIES, NTHREADS, NEVENTS_THREAD have default values specific to each benchmark + for var in NCOPIES NTHREADS NEVENTS_THREAD; do + if [ "${!var}" == "" ]; then + echo "[$bmkDriver] ERROR! A default value of $var must be set in $bmkScript" # internal error (missing code) + exit 1; + fi + done + echo +} + +# Variables USER_NCOPIES, USER_NTHREADS, USER_NEVENTS_THREAD are empty by default +USER_NCOPIES= +USER_NTHREADS= +USER_NEVENTS_THREADS= + +# Variable resultsDir has default value /results +# Variables skipSubDir and DEBUG are 0 by default +resultsDir=/results +skipSubDir=0 +DEBUG=0 + +function advertise_user_defined_variables(){ + for var in NCOPIES NTHREADS NEVENTS_THREAD; do + echo "Default (from $bmkScript): $var=${!var}" + done + echo + for var in USER_NCOPIES USER_NTHREADS USER_NEVENTS_THREAD; do + echo "Default (from $bmkDriver): $var=${!var}" + done + echo + for var in resultsDir skipSubDir DEBUG; do + echo "Default (from $bmkDriver): $var=${!var}" + done +} + +# Usage function +function usage(){ + echo "" + echo "Usage: $0 [-w <resultsDir>] [-W] [-c <NCOPIES>] [-t <NTHREADS>] [-e <NEVENTS_PER_THREAD>] [-d] [-h]" + echo " -w <resultsDir> : results directory (default: /results , current: $resultsDir)" + echo " -W : store results in <resultsDir> directly (default: 0 , current: $skipSubDir)" + echo " -c <NCOPIES> : # identical copies (default $NCOPIES)" + echo " -t <NTHREADS> : # threads (or processes, or threads*processes) per copy (default $NTHREADS)" + echo " -e <NEVENTS_THREAD> : # events per thread (default $NEVENTS_THREAD)" + echo " -d : debug mode (current: $DEBUG)" + echo " -h : display this help and exit" + echo "" + if [ $NTHREADS -eq 1 ]; then + echo "NTHREADS : the default value NTHREADS=1 of this parameter cannot be changed" + echo " (single-threaded single-process workload application)" + echo "" + fi + echo "Without -W (default): results are stored in a new subdirectory of <resultsDir>:" + echo " <resultsDir>/<uniqueid>/*.json" + echo " <resultsDir>/<uniqueid>/proc_1/*.log" + echo " <resultsDir>/<uniqueid>/proc_.../*.log" + echo " <resultsDir>/<uniqueid>/proc_<COPIES>/*.log" + echo "With -W (e.g. in the CI): results are stored in <resultsDir> directly:" + echo " <resultsDir>/*.json" + echo " <resultsDir>/proc_1/*.log" + echo " <resultsDir>/proc_.../*.log" + echo " <resultsDir>/proc_<NCOPIES>/*.log" + echo "" + echo "Without -w (default) and without -W: <resultsDir> is /results" + echo "Without -w (default) and with -W: <resultsDir> is a tmp directory /tmp/xxxx" + echo "" + if [ "$(type -t usage_detailed)" == "function" ]; then + echo -e "\nDetailed Usage:\n----------------\n" + ( usage_detailed ) # as a subprocess, just in case this has a 0 exit code... + fi + echo -e "DESCRIPTION\n" + if [ -e $BMKDIR/DESCRIPTION ]; then + cat $BMKDIR/DESCRIPTION + else + echo "Sorry there is not description included." + fi + echo "" + exit 1 # early termination (help or invalid arguments to benchmark script) +} + +##################### +### HERE MAIN STARTS +##################### + +# Parse the input arguments +callUsage== +while getopts "c:t:e:w:Wdh" o; do + case ${o} in + c) + if [ $OPTARG -gt 0 ]; then + USER_NCOPIES=$OPTARG + else + echo "[$bmkDriver] ERROR! Invalid argument '-c $OPTARG' (must be > 0)" + exit 1 # early termination (invalid arguments to benchmark script) + fi + ;; + t) + if [ $OPTARG -gt 0 ]; then + USER_NTHREADS=$OPTARG + if [ $NTHREADS -eq 1 ] && [ $USER_NTHREADS -ne 1 ]; then + echo "[$bmkDriver] ERROR! Invalid argument '-t $OPTARG' (default NTHREADS=1 cannot be changed)" + exit 1 # early termination (invalid arguments to benchmark script) + fi + else + echo "[$bmkDriver] ERROR! Invalid argument '-t $OPTARG' (must be > 0)" + exit 1 # early termination (invalid arguments to benchmark script) + fi + ;; + e) + if [ $OPTARG -gt 0 ]; then + USER_NEVENTS_THREAD=$OPTARG + else + echo "[$bmkDriver] ERROR! Invalid argument '-e $OPTARG' (must be > 0)" + exit 1 + fi + ;; + w) + resultsDir=$OPTARG + ;; + W) + skipSubDir=1 + ;; + d) + DEBUG=1 + ;; + *) + callUsage=1 # need to do in this way to enable parsing of all arguments (see BMK-258) + ;; + esac +done + +if [ "$DEBUG" == 1 ]; then + echo -e "\n[$bmkDriver] Parse input arguments '$@'\n" + advertise_bmkdriver + advertise_user_defined_variables +fi + +# No other input arguments are expected +shift $((OPTIND -1)) +if [ "$1" != "" ]; then usage; fi + +if [ "$callUsage" == "1" ]; then usage; fi + +# Check that mandatory functions exist or load them otherwise +check_mandatory_functions + +# Check that mandatory variables have been defined (default values) +check_mandatory_variables + +# Dump all relevant variables after parsing the input arguments +for var in USER_NCOPIES USER_NTHREADS USER_NEVENTS_THREAD; do + echo "Current value: $var=${!var}" +done +echo +for var in resultsDir skipSubDir DEBUG; do + echo "Current value: $var=${!var}" +done +echo + +# Variable resultsDir must be set through command line options +# Backward compatibility: all benchmarks initially hardcoded 'RESULTS_DIR=/results' +if [ "${resultsDir}" == "" ]; then + ###echo "[$bmkDriver] ERROR! resultsDir not specified ('-w' missing)" + ###exit 1 # early termination (invalid arguments to benchmark script) + if [ "$skipSubDir" == "1" ]; then + echo -e "[$bmkDriver] WARNING! resultsDir not specified ('-w' missing), but '-W' is present: create a directory in /tmp\n" + resultsDir=$(mktemp -d) + else + echo -e "[$bmkDriver] WARNING! resultsDir not specified ('-w' missing) and '-W' is missing: assume '/results'\n" + resultsDir=/results + fi +fi + +# Check that resultsDir is an existing directory +if [ ! -d ${resultsDir} ]; then + mkdir -p ${resultsDir} + if [ "$?" != "0" ]; then + echo "[$bmkDriver] ERROR! directory '${resultsDir}' not found and could not be created" + exit 1 # early termination (cannot start processing) + fi +fi + +# Status code of the validateInputArguments and doOne steps +# fail<0 : validateInputArguments failed +# fail>0 : doOne failed +# fail=0 : OK +fail=0 + +# Call function validateInputArguments if it exists +if [ "$(type -t validateInputArguments)" != "function" ]; then + echo -e "[$bmkDriver] function 'validateInputArguments' not found: use input arguments as given\n" + if [ "$USER_NCOPIES" != "" ]; then NCOPIES=$USER_NCOPIES; fi + if [ "$USER_NTHREADS" != "" ]; then NTHREADS=$USER_NTHREADS; fi # already checked that USER_NTHREADS must be 1 if NTHREADS is 1 + if [ "$USER_NEVENTS_THREAD" != "" ]; then NEVENTS_THREAD=$USER_NEVENTS_THREAD; fi +else + echo -e "[$bmkDriver] function 'validateInputArguments' starting\n" + if ! validateInputArguments; then fail=-1; fi + echo -e "\n[$bmkDriver] function 'validateInputArguments' completed (status=$fail)\n" +fi + +# Set baseWDir and create it if necessary +if [ "$skipSubDir" == "1" ]; then + baseWDir=${resultsDir} + echo -e "[$bmkDriver] base working directory : $baseWDir\n" +else + baseWDir=${resultsDir}/$(basename $0 -bmk.sh)-c${NCOPIES}-e${NEVENTS_THREAD}-$(date +%s)_$(((RANDOM%9000)+1000)) + echo -e "[$bmkDriver] base working directory : $baseWDir\n" + if ! mkdir $baseWDir; then + echo "[$bmkDriver] ERROR! directory '${baseWDir}' cannot be created" + exit 1 # early termination (cannot start processing) + fi +fi +baseWDir=$(cd $baseWDir; pwd) + +# Dump all relevant variables after validating the input arguments +# Keep a copy on a separate log too for parser tests on previous logs +touch $baseWDir/inputs.log +for var in NCOPIES NTHREADS NEVENTS_THREAD; do + if [ "${!var}" == "" ] || ! [[ ${!var} =~ ^[0-9]+$ ]] || [ ! ${!var} -gt 0 ]; then + echo "[$bmkDriver] ERROR! Invalid value $var=${!var}" + exit 1; + fi + echo "Current value: $var=${!var}" + echo "$var=${!var}" >> $baseWDir/inputs.log +done +echo + +# Keep a copy of the version.json file for parser tests on previous logs +if [ -f $BMKDIR/version.json ]; then + cp $BMKDIR/version.json $baseWDir +fi + +# Define APP before doOne (BMK-152) and parseResults +APP=$(basename ${BMKDIR}) # or equivalently here $(basename $0 -bmk.sh) +echo -e "[$bmkDriver] APP=${APP}\n" + +# Wrapper for the doOne function +function doOneWrapper(){ + if [ "$1" == "" ] || [ "$2" != "" ]; then + echo -e "[$bmkDriver] ERROR! Invalid arguments '$@' to doOneWrapper" # internal error (inconsistent code) + return 1 # NB: return or exit are equivalent here because doOneWrapper is executed as a subprocess + fi + echo -e "\n[doOneWrapper ($1)] $(date) : process $1 started" + ###sleep 5 # this is not needed if the list of jobs is compiled from all '$!' + workDir=$(pwd)/proc_$1 # current directory is $baseWDir here + echo -e "[doOneWrapper ($1)] workdir is ${workDir}" + if ! mkdir -p $workDir || ! cd $workDir; then + echo -e "\n[doOneWrapper ($1)] $(date) : process $1 failed (cannot create workdir)\n" + return 1 + fi + log=${workDir}/doOneWrapper_$1.log + echo -e "[doOneWrapper ($1)] logfile is $log" + if ! touch $log ; then + echo -e "\n[doOneWrapper ($1)] $(date) : process $1 failed (cannot create logfile)\n" + return 1 + fi + echo -e "[doOneWrapper ($1)] $(date) : process $1 configured" 2>&1 | tee -a $log # configured means that log exists + mkdir $workDir/HOME + export HOME=$workDir/HOME # avoid writing to /root in read-only docker or to host HOME in singularity (BMK-166) + echo -e "[doOneWrapper ($1)] HOME=$HOME" 2>&1 | tee -a $log + cd -P /proc/self && basename $PWD | ( read thispid; \ + echo -e "[doOneWrapper ($1)] current process pid is $thispid" 2>&1 | tee -a $log ) # see https://stackoverflow.com/a/15170225 + cd - > /dev/null + local pid=$(cat $log | grep "current process pid is" | sed -e "s/.*current process pid is //") + local parsertest=0 # hardcoded: 0 => doOne (default); 1 => test the parser on old logs and bypass doOne (BMK-152) + if [ $parsertest -eq 0 ]; then + if [ "$(whoami)" == "root" ] && cat /proc/self/cgroup | cut -d/ -f2 | grep docker > /dev/null; then + echo -e "[doOneWrapper ($1)] inside docker - run doOne as bmkuser\n" 2>&1 | tee -a $log + export -f doOne + chown -R bmkuser:bmkuser $workDir 2>&1 | tee -a $log + su bmkuser -s /bin/bash -c "doOne $1" 2>&1 | tee -a $log + local status=${PIPESTATUS[0]} # NB do not use $? if you pipe to tee! + chown -R root:root $workDir 2>&1 | tee -a $log + else + echo -e "[doOneWrapper ($1)] not inside docker - run doOne as $(whoami)\n" 2>&1 | tee -a $log + doOne $1 2>&1 | tee -a $log + local status=${PIPESTATUS[0]} # NB do not use $? if you pipe to tee! + fi + else + cp -dpr $BMKDIR/jobs/refjob/proc_$1/* . + local status=$? + \rm -f *${APP}*.json + echo -e "[doOneWrapper ($1)] DUMMY doOne: copy old logs for parser tests (BMK-152)" + fi + if [ "$status" == "0" ]; then + echo -e "\n[doOneWrapper ($1)] $(date) : process $1 (pid=$pid) completed ok\n" 2>&1 | tee -a $log + return 0 + else + echo -e "\n[doOneWrapper ($1)] $(date) : process $1 (pid=$pid) failed\n" 2>&1 | tee -a $log + return 1 + fi +} + +# Export variables to the doOne subprocesses +for var in NCOPIES NTHREADS NEVENTS_THREAD BMKDIR DEBUG APP; do + export $var +done + +# Spawn doOne subprocesses (unless validateInputArguments failed) +if [ $fail -eq 0 ]; then + + # Spawn subprocesses (and keep track of their list of them using '$!') + echo -e "------------------------------------------------------------------------" + echo -e "[$bmkDriver] spawn $NCOPIES processes" + echo -e "------------------------------------------------------------------------\n" + jobs="" + for i in $(seq 1 $NCOPIES); do + ( cd $baseWDir; doOneWrapper $i ) & + ipid=$! + [ $DEBUG -gt 0 ] && echo -e "[$bmkDriver] spawned process $i with pid $ipid" + jobs="$jobs $ipid" + sleep 0.1 # stagger job creation by 100ms + done + + # Wait for all subprocesses to complete and check their exit codes + # [NB: do not use 'jobs -p': some jobs may be missing if already completed] + [ $DEBUG -gt 0 ] && echo -e "\n[$bmkDriver] $(date) ... waiting for spawned processes with pid's$jobs\n" + wait $jobs > /dev/null 2>&1 + fail=0 # unnecessary but harmless (this code is only executed if $fail -eq 0) + for i in $(seq 1 $NCOPIES); do + if [ $(cat $baseWDir/proc_$i/doOneWrapper_$i.log | grep "[doOneWrapper ($i)]" | grep "completed ok" | wc -l) -ne 1 ]; then + let "fail+=1" + fi + done + echo -e "\n------------------------------------------------------------------------" + if [ $fail -gt 0 ]; then + echo "[$bmkDriver] ERROR! $fail processes failed (out of $NCOPIES)" + else + echo "[$bmkDriver] all $NCOPIES processes completed successfully" + fi + echo -e "------------------------------------------------------------------------\n" + +# Skip the doOne step if validateInputArguments failed +else + echo -e "[$bmkDriver] validateInputArguments failed: skip doOne processing" +fi + +# Parse results and generate summary using function parseResults +# - parseResults is started in the base working directoy +# - the number of failed jobs is passed to parseResults as input parameter +# - if a separate function generateSummary exists, it must be internally called by parseResults +# - the environment variable APP=<vo>-<workload> defines the name of the json file ${APP}_summary.json +cd $baseWDir +echo -e "[$bmkDriver] parse results and generate summary: starting" +echo -e "[$bmkDriver] current directory : $(pwd)\n" +parseResults $fail +parse=$? +echo -e "\n[$bmkDriver] parse results and generate summary: completed (status=$parse)" + +# Validate json files syntax (BMK-137) +cd $baseWDir +echo -e "\n[$bmkDriver] json file validation: starting" +json=0 +jsonFile=$baseWDir/${APP}_summary.json +jsonFile_new=$baseWDir/${APP}_summary_new.json +if [ ! -f ${jsonFile} ]; then + echo -e "[$bmkDriver] ERROR! json file '${jsonFile}' not found" + json=1 +else + echo "[$bmkDriver] lint json file '${jsonFile}' syntax using jq" + if ! jq '.' -c < ${jsonFile}; then + echo "[$bmkDriver] json file '${jsonFile}' lint validation failed" + json=1 + fi +fi +if [ -f ${jsonFile_new} ]; then + echo "[$bmkDriver] lint json file '${jsonFile_new}' syntax using jq" + if ! jq '.' -c < ${jsonFile_new}; then + echo "[$bmkDriver] json file '${jsonFile_new}' lint validation failed" + json=1 + fi +fi +echo -e "[$bmkDriver] json file validation: completed (status=$json)\n" + +# NB: This script is meant to be sourced, it does not return or exit at the end +if [ $parse -ne 0 ] || [ $fail -ne 0 ] || [ $json -ne 0 ]; then + bmkStatus=1 +else + bmkStatus=0 +fi +echo -e "[$bmkDriver] exiting back to $bmkScript" +echo -e "\n========================================================================" +echo -e "[$bmkDriver] $(date) exiting common benchmark driver (status=$bmkStatus)" +echo -e "========================================================================\n" +exit $bmkStatus diff --git a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh new file mode 100755 index 0000000..58ffa5b --- /dev/null +++ b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh @@ -0,0 +1,56 @@ +#!/bin/env bash +# Wrapper script based on work from https://github.com/sciaba/patatrack-tests +# 2020.06 David Southwick <david.southwick@cern.ch> - include newer workflow for pre8 patatrack, singularity support + +#set -x # enable debug printouts + +#set -e # immediate exit on error + +# Function doOne must be defined in each benchmark +# Input argument $1: process index (between 1 and $NCOPIES) +# Return value: please return 0 if this workload copy was successful, 1 otherwise +# The following variables are guaranteed to be defined and exported: NCOPIES, NTHREADS, NEVENTS_THREAD, BMKDIR, DEBUG +# The function is started in process-specific working directory <basewdir>/proc_$1: +# please store here the individual log files for each of the NCOPIES processes +function doOne(){ + if [ "$1" == "" ] || [ "$2" != "" ]; then echo "[doOne] ERROR! Invalid arguments '$@' to doOne"; return 1; fi + echo "[doOne ($1)] $(date) starting in $(pwd)" + # Extra CMS-PATATRACK-specific setup + export CMSSW_RELEASE=CMSSW_11_1_0_pre8_Patatrack + export VO_CMS_SW_DIR=/cvmfs/cms.cern.ch + export LC_ALL=en_US.UTF-8 + + source $VO_CMS_SW_DIR/cmsset_default.sh + [[ ! -e ${CMSSW_RELEASE} ]] && scram project CMSSW ${CMSSW_RELEASE} + pushd ${CMSSW_RELEASE}; eval `scramv1 runtime -sh`; popd + # Configure WL copy + + echo -e "\nRunning benchmark..." + echo ${BMKDIR} + ls ${BMKDIR} + pwd + ls + + ${BMKDIR}/benchmark ${BMKDIR}/profile.py >>$LOG 2>&1 3>&1 + status=${?} + echo "[doOne ($1)] $(date) completed (status=$status)" + # Return 0 if this workload copy was successful, 1 otherwise + return $status +} + + +# Default values for NCOPIES, NTHREADS, NEVENTS_THREAD must be set in each benchmark +NTHREADS= +NCOPIES=1 +NEVENTS_THREAD=10 +if [ "$NCOPIES" -lt 1 ]; then # when $NTHREADS > nproc + NCOPIES=1 + NTHREADS=`nproc` +fi + +# Source the common benchmark driver +if [ -f $(dirname $0)/bmk-driver.sh ]; then + . $(dirname $0)/bmk-driver.sh +else + . $(dirname $0)/../../../common/bmk-driver.sh +fi diff --git a/cms/patatrack/cms-patatrack/cms-reco-bmk.sh b/cms/patatrack/cms-patatrack/cms-reco-bmk.sh new file mode 100755 index 0000000..6a91e8f --- /dev/null +++ b/cms/patatrack/cms-patatrack/cms-reco-bmk.sh @@ -0,0 +1,93 @@ +#!/bin/bash + +#set -x # enable debug printouts + +#set -e # immediate exit on error + +# Function doOne must be defined in each benchmark +# Input argument $1: process index (between 1 and $NCOPIES) +# Return value: please return 0 if this workload copy was successful, 1 otherwise +# The following variables are guaranteed to be defined and exported: NCOPIES, NTHREADS, NEVENTS_THREAD, BMKDIR, DEBUG +# The function is started in process-specific working directory <basewdir>/proc_$1: +# please store here the individual log files for each of the NCOPIES processes +function doOne(){ + if [ "$1" == "" ] || [ "$2" != "" ]; then echo "[doOne] ERROR! Invalid arguments '$@' to doOne"; return 1; fi + echo "[doOne ($1)] $(date) starting in $(pwd)" + # Extra CMS-RECO-specific setup + export CMSSW_RELEASE=CMSSW_10_2_9 + export VO_CMS_SW_DIR=/cvmfs/cms.cern.ch + source $VO_CMS_SW_DIR/cmsset_default.sh + export SCRAM_ARCH=slc6_amd64_gcc700 + [[ ! -e ${CMSSW_RELEASE} ]] && scram project CMSSW ${CMSSW_RELEASE} + pushd ${CMSSW_RELEASE}; eval `scramv1 runtime -sh`; popd + # Configure WL copy + ln -s ${BMKDIR}/data/GlobalTag.db ./GlobalTag.db + ln -s ${BMKDIR}/data/*.root . + CMSSW_CONF=step3_RAW2DIGI_L1Reco_RECO_EI_PAT_DQM.py + JOB_EVENTS=$(( NEVENTS_THREAD * NTHREADS )) # bash shell arithmetic, may use var instead of $var + cp ${BMKDIR}/${CMSSW_CONF}_template ./${CMSSW_CONF} + sed -e "s@_NEVENTS_@${JOB_EVENTS}@g" -e "s@_NTHREADS_@$NTHREADS@g" -i ./${CMSSW_CONF} + # Execute WL copy + LOG=out_$1.log + cmsRun ./${CMSSW_CONF} >>$LOG 2>&1 3>&1 + status=${?} + echo "[doOne ($1)] $(date) completed (status=$status)" + # Return 0 if this workload copy was successful, 1 otherwise + return $status +} + +# Optional function validateInputArguments may be defined in each benchmark +# If it exists, it is expected to set NCOPIES, NTHREADS, NEVENTS_THREAD +# (based on previous defaults and on user inputs USER_NCOPIES, USER_NTHREADS, USER_NEVENTS_THREADS) +# Input arguments: none +# Return value: please return 0 if input arguments are valid, 1 otherwise +# The following variables are guaranteed to be defined: NCOPIES, NTHREADS, NEVENTS_THREAD +# (benchmark defaults) and USER_NCOPIES, USER_NTHREADS, USER_NEVENTS_THREADS (user inputs) +function validateInputArguments(){ + if [ "$1" != "" ]; then echo "[validateInputArguments] ERROR! Invalid arguments '$@' to validateInputArguments"; return 1; fi + echo "[validateInputArguments] validate input arguments" + # Number of copies and number of threads per copy + if [ "$USER_NTHREADS" != "" ] && [ "$USER_NCOPIES" != "" ]; then + NCOPIES=$USER_NCOPIES + NTHREADS=$USER_NTHREADS + elif [ "$USER_NTHREADS" != "" ]; then + NTHREADS=$USER_NTHREADS + NCOPIES=$((`nproc`/$NTHREADS)) + elif [ "$USER_NCOPIES" != "" ]; then + NCOPIES=$USER_NCOPIES + NTHREADS=$((`nproc`/$NCOPIES)) + fi + # Number of events per thread + if [ "$USER_NEVENTS_THREAD" != "" ]; then NEVENTS_THREAD=$USER_NEVENTS_THREAD; fi + # Return 0 if input arguments are valid, 1 otherwise + # Report any issues to parseResults via s_msg + export s_msg="ok" + tot_load=$(($NCOPIES*$NTHREADS)) + if [ $tot_load -gt `nproc` ]; then + s_msg="[ERROR] NCOPIES*NTHREADS=$NCOPIES*$NTHREADS=$tot_load > number of available cores (`nproc`)" + return 1 + elif [ $tot_load -eq 0 ]; then + s_msg="[ERROR] NCOPIES*NTHREADS=$NCOPIES*$NTHREADS=$tot_load. Please fix it" + return 1 + elif [ $tot_load -ne `nproc` ]; + then s_msg="[WARNING] NCOPIES*NTHREADS ($NCOPIES*$NTHREADS=$tot_load) != `nproc` (number of available cores nproc)" + echo $s_msg + fi + return 0 +} + +# Default values for NCOPIES, NTHREADS, NEVENTS_THREAD must be set in each benchmark +NTHREADS=4 +NCOPIES=$(( `nproc` / $NTHREADS )) +NEVENTS_THREAD=100 +if [ "$NCOPIES" -lt 1 ]; then # when $NTHREADS > nproc + NCOPIES=1 + NTHREADS=`nproc` +fi + +# Source the common benchmark driver +if [ -f $(dirname $0)/bmk-driver.sh ]; then + . $(dirname $0)/bmk-driver.sh +else + . $(dirname $0)/../../../common/bmk-driver.sh +fi diff --git a/cms/patatrack/cms-patatrack/cvmfs/.keepme b/cms/patatrack/cms-patatrack/cvmfs/.keepme new file mode 100644 index 0000000..e69de29 diff --git a/cms/patatrack/cms-patatrack/data/.keepme b/cms/patatrack/cms-patatrack/data/.keepme new file mode 100644 index 0000000..e69de29 diff --git a/cms/patatrack/cms-patatrack/parseResults.sh b/cms/patatrack/cms-patatrack/parseResults.sh new file mode 100644 index 0000000..2af853a --- /dev/null +++ b/cms/patatrack/cms-patatrack/parseResults.sh @@ -0,0 +1,67 @@ +function generateSummary(){ + echo -e "{\"copies\":$NCOPIES , \"threads_per_copy\":$NTHREADS , \"events_per_thread\" : $NEVENTS_THREAD , \"wl-scores\": $res_score, \"wl-stats\": {\"throughput_score\": $res_thr , \"CPU_score\": $res_cpu }, \"log\": \"${s_msg}\", \"app\": `cat $BMKDIR/version.json` }" > ${APP}_summary.json + cat ${APP}_summary.json +} + +# Function parseResults must be defined in each benchmark (or in a separate file parseResults.sh) +# [NB: if a separate function generateSummary exists, it must be internally called by parseResults] +# Input argument $1: status code <fail> from validateInputArguments and doOne steps: +# - <fail> < 0: validateInputArguments failed +# - <fail> > 0: doOne failed (<fail> processes failed out of $NCOPIES) +# - <fail> = 0: OK +# Return value: please return 0 if parsing was successful, 1 otherwise +# The following variables are guaranteed to be defined and exported: NCOPIES, NTHREADS, NEVENTS_THREAD, BMKDIR, DEBUG, APP +# The environment variable APP=<vo>-<workload> defines the name of the json file ${APP}_summary.json +# Logfiles have been stored in process-specific working directories <basewdir>/proc_<1...NCOPIES> +# The function is started in the base working directory <basewdir>: +# please store here the overall json summary file for all NCOPIES processes combined +function parseResults(){ + if [ "$1" == "" ] || [ "$2" != "" ]; then echo "[parseresults] ERROR! Invalid arguments '$@' to parseResults"; return 1; fi + echo "[parseResults] parse results and generate summary (previous status: $1)" + echo "[parseResults] current directory: $(pwd)" + export res_cpu='""' + export res_thr='""' + export res_score='""' + export s_msg="ok" + if [ "$1" -ne 0 ]; then + echo "Previous steps failed: skip parsing, go to generateSummary" + generateSummary # this has no return code + return 1 + else + #----------------------- + # Parse results + #----------------------- + echo "[parseResults] parsing results from" proc_*/out_*.log + # Documentation of cmssw time report at https://github.com/cms-sw/cmssw/blob/09c3fce6626f70fd04223e7dacebf0b485f73f54/FWCore/Services/plugins/Timing.cc#L240 + # Parsing Event Throughput: xxxx ev/s + res_thr=`grep -H "Event Throughput" proc_*/out_*.log | sed -e "s@[^:]*: Event Throughput: \([ 0-9\.]*\) ev/s@\1@" | awk 'BEGIN{amin=1000000;amax=0;count=0;} { val=$1; a[count]=val; count+=1; sum+=val; if(amax<val) amax=val; if(amin>val) amin=val} END{n = asort(a); if (n % 2) { median=a[(n + 1) / 2]; } else {median=(a[(n / 2)] + a[(n / 2) + 1]) / 2.0;}; +printf "{\"score\": %.4f, \"avg\": %.4f, \"median\": %.4f, \"min\": %.4f, \"max\": %.4f}", sum, sum/count, median, amin, amax +}' nevt=$NEVENTS_THREAD nthread=$NTHREADS || (echo "{}"; return 1)` + STATUS_1=$? + + #Duplicating above parsing, as quick and dirty. SHoudl be replaced by a python parser + res_score=`grep -H "Event Throughput" proc_*/out_*.log | sed -e "s@[^:]*: Event Throughput: \([ 0-9\.]*\) ev/s@\1@" | awk 'BEGIN{amin=1000000;amax=0;count=0;} { val=$1; a[count]=val; count+=1; sum+=val; if(amax<val) amax=val; if(amin>val) amin=val} END{n = asort(a); if (n % 2) { median=a[(n + 1) / 2]; } else {median=(a[(n / 2)] + a[(n / 2) + 1]) / 2.0;}; +printf "{\"reco\": %.4f}", sum +}' nevt=$NEVENTS_THREAD nthread=$NTHREADS || (echo "{}"; return 1)` + + # Parsing CPU Summary: \n- Total loop:: xxxx seconds of all CPUs + res_cpu=`grep -H -A2 "CPU Summary" proc_*/out_*.log | grep "Total loop" | sed -e "s@.*\sTotal loop: \([ 0-9\.]*\)@\1@" | awk 'BEGIN{amin=1000000;amax=0;count=0;} { val=nevt*nthread/$1; a[count]=val; count+=1; sum+=val; if(amax<val) amax=val; if(amin>val) amin=val} END{n = asort(a); if (n % 2) {median=a[(n + 1) / 2]; } else {median=(a[(n / 2)] + a[(n / 2) + 1]) / 2.0;}; +printf "{\"score\": %.4f, \"avg\": %.4f, \"median\": %.4f, \"min\": %.4f, \"max\": %.4f}", sum, sum/count, median, amin, amax +}' nevt=$NEVENTS_THREAD nthread=$NTHREADS || (echo "{}"; return 1)` + STATUS_2=$? + [[ "$STATUS_1" == "0" ]] && [[ "$STATUS_2" == "0" ]] + STATUS=$? + [[ "$STATUS" != "0" ]] && export s_msg="ERROR" + echo "[parseResults] parsing completed (status=$STATUS)" + #----------------------- + # Generate summary + #----------------------- + echo "[parseResults] generate summary" + generateSummary # this has no return code + #----------------------- + # Return status + #----------------------- + # Return 0 if result parsing and json generation were successful, 1 otherwise + return $STATUS + fi +} diff --git a/cms/patatrack/cms-patatrack/prepare-patch.sh b/cms/patatrack/cms-patatrack/prepare-patch.sh new file mode 100755 index 0000000..8a45177 --- /dev/null +++ b/cms/patatrack/cms-patatrack/prepare-patch.sh @@ -0,0 +1,26 @@ +#!/bin/env bash + +install_dir="/tmp/install" +echo -e "\nCloning Patatrack repos into ${install_dir}..." +# Clone software repos +git clone https://github.com/cms-patatrack/patatrack-scripts ${install_dir}/patatrack-scripts +git clone https://github.com/sciaba/patatrack-tests ${install_dir}/patatrack-tests + +cd $install_dir + +echo -e "\nSet up Patatrack Scripts..." +# Prepare scripts +cp ${install_dir}/patatrack-tests/*/*.patch \ + ${install_dir}/patatrack-tests/config/sourceFromPixelRaw_cff.py \ + ${install_dir}/patatrack-scripts/ + +cd ${install_dir}/patatrack-scripts/ +patch -b --forward workflow.sh workflow.patch + +./workflow.sh +patch -b --forward profile.py profile.patch +#change .../patatrack-scripts/sourceFromPixelRaw_cff.py and point "fed_prefix" to .../extraction/path/.../store/opendata/cms +sed -i "s|/data/store/opendata/cms|${install_dir}/store/opendata/cms|g" sourceFromPixelRaw_cff.py + +[ ! -d /bmk/cms-patatrack ] && mkdir -p /bmk/cms-patatrack +cp -r ${install_dir}/patatrack-scripts /bmk/cms-patatrack \ No newline at end of file diff --git a/cms/patatrack/cms-patatrack/test_parser.sh b/cms/patatrack/cms-patatrack/test_parser.sh new file mode 100755 index 0000000..4778af6 --- /dev/null +++ b/cms/patatrack/cms-patatrack/test_parser.sh @@ -0,0 +1,2 @@ +#!/bin/bash +$(dirname $0)/../../../common/parsertest.sh $(dirname $0) -- GitLab From 95b3297fb127ef846891f725aad02dbc3c81842d Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Tue, 30 Jun 2020 21:32:02 +0200 Subject: [PATCH 02/74] patatrack CI --- .gitlab-ci.yml | 22 ++++++++++++- cms/patatrack/cms-patatrack-ci.yml | 50 ++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 cms/patatrack/cms-patatrack-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5aeca70..22a0bcc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,8 +1,11 @@ stages: - test -- announce-promoted-image - triggers +##################################################### +### ATLAS KV (a test of cvmfs functionality) +##################################################### + job_test_kv: stage: test image: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa @@ -33,6 +36,23 @@ job_test_kv: expire_in: 1 week when: always +##################################################### +### CMS PATATRACK +##################################################### + +patatrack: + stage: triggers + trigger: + include: cms/patatrack/cms-patatrack-ci.yml + strategy: depend + only: + variables: + - $CI_COMMIT_BRANCH =~ /^qa.*$/ + - $CI_COMMIT_TAG =~ /^v.*$/ + +##################################################### +### LHC Simple Track +##################################################### simpletrack: stage: triggers diff --git a/cms/patatrack/cms-patatrack-ci.yml b/cms/patatrack/cms-patatrack-ci.yml new file mode 100644 index 0000000..90188c5 --- /dev/null +++ b/cms/patatrack/cms-patatrack-ci.yml @@ -0,0 +1,50 @@ +--- +stages: +- build +- snapshot +- rebuild +- test + +########################## +## Templates ############ + +.definition_build_image: &template_build_image + image: # NB enable shared runners and do not specify a CI tag + name: gitlab-registry.cern.ch/ci-tools/docker-image-builder # CERN version of the Kaniko image + entrypoint: [""] + script: + - echo "current commit is ${CI_COMMIT_SHA:0:8}" + - echo "current branch is ${CI_COMMIT_BRANCH}" + - echo "current tag is ${CI_COMMIT_TAG}" + - if [[ -z $DOCKERFILE ]]; then echo "ERROR variable DOCKERFILE is not defined "; exit 1; fi + - if [[ -z $CONTEXT ]]; then echo "ERROR variable CONTEXT is not defined "; exit 1; fi + - if [[ -z $IMAGE_NAME ]]; then echo "ERROR variable IMAGE_NAME is not defined "; exit 1; fi + - if [[ -z $IMAGE_TAG ]]; then echo "ERROR variable IMAGE_TAG is not defined "; exit 1; fi + - export DESTINATIONS="--destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG --destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8}" + - echo "DESTINATIONS $DESTINATIONS" + # Prepare Kaniko configuration file + - echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json + # Build and push the image from the Dockerfile at the root of the project. + # To push to a specific docker tag, amend the --destination parameter, e.g. --destination $CI_REGISTRY_IMAGE:$CI_BUILD_REF_NAME + # See https://docs.gitlab.com/ee/ci/variables/predefined_variables.html#variables-reference for available variables + - /kaniko/executor --context $CONTEXT --dockerfile $DOCKERFILE $DESTINATIONS + + +########################################################### +# docker in docker image: to trigger other docker runs +########################################################### + +job_build_interim_image: + stage: build + before_script: + - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/Dockerfile.nvidia + - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack + - export IMAGE_NAME=cms-patatrack + - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} + <<: *template_build_image + only: + changes: + - cms/patatrack/* + - cms/patatrack/cms-patatrack/* + - cms/patatrack/cms-patatrack/*/*- docker-images/cvmfs/* + -- GitLab From afad8af54eddc23213e79005a7e7856f13a0123f Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Tue, 30 Jun 2020 21:33:40 +0200 Subject: [PATCH 03/74] patatrack CI --- cms/patatrack/cms-patatrack-ci.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cms/patatrack/cms-patatrack-ci.yml b/cms/patatrack/cms-patatrack-ci.yml index 90188c5..6122584 100644 --- a/cms/patatrack/cms-patatrack-ci.yml +++ b/cms/patatrack/cms-patatrack-ci.yml @@ -1,12 +1,12 @@ --- stages: - build -- snapshot -- rebuild -- test +#- snapshot +#- rebuild +#- test ########################## -## Templates ############ +## Templates ############# .definition_build_image: &template_build_image image: # NB enable shared runners and do not specify a CI tag @@ -39,12 +39,13 @@ job_build_interim_image: before_script: - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/Dockerfile.nvidia - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack - - export IMAGE_NAME=cms-patatrack + - export IMAGE_NAME=cms-patatrack-interim - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} <<: *template_build_image only: changes: - cms/patatrack/* - cms/patatrack/cms-patatrack/* - - cms/patatrack/cms-patatrack/*/*- docker-images/cvmfs/* + - cms/patatrack/cms-patatrack/*/* + - docker-images/cvmfs/* -- GitLab From 3f6c1abeb5924a16cf072ec362e5227e533579bd Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Tue, 30 Jun 2020 21:37:52 +0200 Subject: [PATCH 04/74] reorder CI --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 22a0bcc..1a72409 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,6 +1,6 @@ stages: -- test - triggers +- test ##################################################### ### ATLAS KV (a test of cvmfs functionality) -- GitLab From 9f958919442a6af907b9b7d6fff01c5b6263fec0 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Tue, 30 Jun 2020 21:43:01 +0200 Subject: [PATCH 05/74] add tag CI --- cms/patatrack/cms-patatrack-ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cms/patatrack/cms-patatrack-ci.yml b/cms/patatrack/cms-patatrack-ci.yml index 6122584..1f658df 100644 --- a/cms/patatrack/cms-patatrack-ci.yml +++ b/cms/patatrack/cms-patatrack-ci.yml @@ -36,6 +36,8 @@ stages: job_build_interim_image: stage: build + tags: + - hep-workload-gpu-docker-builder before_script: - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/Dockerfile.nvidia - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack -- GitLab From 5a4b6b854af4b9f192fcf201912c2fdcc5ca6319 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Tue, 30 Jun 2020 22:56:25 +0200 Subject: [PATCH 06/74] splitting build steps, because long --- cms/patatrack/cms-patatrack-ci.yml | 48 +++++++++++++++++++++++------ cms/patatrack/nvidia.Dockerfile.0 | 21 +++++++++++++ cms/patatrack/nvidia.Dockerfile.1 | 17 +++++++++++ cms/patatrack/nvidia.Dockerfile.2 | 49 ++++++++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 9 deletions(-) create mode 100644 cms/patatrack/nvidia.Dockerfile.0 create mode 100644 cms/patatrack/nvidia.Dockerfile.1 create mode 100644 cms/patatrack/nvidia.Dockerfile.2 diff --git a/cms/patatrack/cms-patatrack-ci.yml b/cms/patatrack/cms-patatrack-ci.yml index 1f658df..63bd36b 100644 --- a/cms/patatrack/cms-patatrack-ci.yml +++ b/cms/patatrack/cms-patatrack-ci.yml @@ -1,6 +1,8 @@ --- stages: -- build +- build_0 +- build_1 +- build_2 #- snapshot #- rebuild #- test @@ -34,20 +36,48 @@ stages: # docker in docker image: to trigger other docker runs ########################################################### -job_build_interim_image: - stage: build +job_build_image_step0: + stage: build_0 tags: - hep-workload-gpu-docker-builder before_script: - - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/Dockerfile.nvidia + - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/nvidia.Dockerfile.0 - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack - - export IMAGE_NAME=cms-patatrack-interim + - export IMAGE_NAME=cms/cms-patatrack-nvidia-0 - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} <<: *template_build_image only: changes: - - cms/patatrack/* - - cms/patatrack/cms-patatrack/* - - cms/patatrack/cms-patatrack/*/* - - docker-images/cvmfs/* + - cms/patatrack/nvidia.Dockerfile.0 + +job_build_image_step1: + stage: build_1 + tags: + - hep-workload-gpu-docker-builder + before_script: + - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/nvidia.Dockerfile.1 + - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack + - export IMAGE_NAME=cms/cms-patatrack-nvidia-1 + - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} + <<: *template_build_image + only: + changes: + - cms/patatrack/nvidia.Dockerfile.0 + - cms/patatrack/nvidia.Dockerfile.1 + +job_build_image_step2: + stage: build_2 + tags: + - hep-workload-gpu-docker-builder + before_script: + - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/nvidia.Dockerfile.2 + - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack + - export IMAGE_NAME=cms/cms-patatrack-nvidia-2 + - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} + <<: *template_build_image + only: + changes: + - cms/patatrack/* + - cms/patatrack/cms-patatrack/* + - cms/patatrack/cms-patatrack/*/* diff --git a/cms/patatrack/nvidia.Dockerfile.0 b/cms/patatrack/nvidia.Dockerfile.0 new file mode 100644 index 0000000..02b2976 --- /dev/null +++ b/cms/patatrack/nvidia.Dockerfile.0 @@ -0,0 +1,21 @@ +FROM nvidia/cuda:10.1-devel-centos7 + +RUN yum install -y \ + which \ + man \ + file \ + util-linux \ + gcc \ + wget \ + tar freetype \ + perl perl-Data-Dumper \ + patch git vim; yum clean all + + +RUN yum --enablerepo=extras install epel-release -y + +RUN yum install -y python2-pip + +RUN pip install --upgrade pip +RUN pip install numpy scipy + diff --git a/cms/patatrack/nvidia.Dockerfile.1 b/cms/patatrack/nvidia.Dockerfile.1 new file mode 100644 index 0000000..aec2d54 --- /dev/null +++ b/cms/patatrack/nvidia.Dockerfile.1 @@ -0,0 +1,17 @@ +FROM gitlab-registry.cern.ch/hep-benchmarks-gpu/cms/cms-patatrack-nvidia-0:qa + + +# Prepare a data directory for downloading large files that should normally be cacheable (BMK-159) +# Its contents should be retrieved in Dockerfile.append, before /bmk/<bmkdir> is copied over +# Each file it contains is then individually symlinked to /bmk/<bmkdir>/data/<file> in Dockerfile.template +RUN mkdir -p /bmk/data + + +# Add here any workload-specific Dockerfile instructions. +# They will be appended to the Dockerfile generated from a common template. + + +RUN echo -e "\nExtracting Patatrack dataset..."; \ + wget -q https://hep-benchmarks.web.cern.ch/hep-benchmarks/hep-workloads/data/cms/patatrack/opendata.tar -O /bmk/data/opendata.tar; \ + cd /bmk/data/; tar -xvf ./opendata.tar + diff --git a/cms/patatrack/nvidia.Dockerfile.2 b/cms/patatrack/nvidia.Dockerfile.2 new file mode 100644 index 0000000..e36535b --- /dev/null +++ b/cms/patatrack/nvidia.Dockerfile.2 @@ -0,0 +1,49 @@ +FROM gitlab-registry.cern.ch/hep-benchmarks-gpu/cms/cms-patatrack-nvidia-1:qa + +# ********* DOCKERFILE TEMPLATE start ********* +# ******* PLEASE DO NOT EDIT THIS FILE! ******* +# This is the common template for all HEP workloads (BMK-124 and BMK-159). +# Please add workload-specific instructions in Dockerfile.append. + +# Optionally allow disabling the cache only from this point onwards if using +# docker build -t your-image --build-arg CACHEBUST=$(date +%s) . +# See https://github.com/moby/moby/issues/1996#issuecomment-185872769 + +###ARG CACHEBUST=1 + +###RUN echo CACHEBUST=$CACHEBUST + +# This should normally contain always the same files and be cacheable (BMK-159) +COPY ./cvmfs /cvmfs + +RUN tar -cf /tmp/cvmfs_checksum.tar /cvmfs && md5sum /tmp/cvmfs_checksum.tar | cut -f1 -d" " > /tmp/cvmfs_checksum && rm /tmp/cvmfs_checksum.tar + +# This should normally contain always the same files and be cacheable (BMK-159) +RUN tar -cf /tmp/bmkdata_checksum.tar /bmk/data && md5sum /tmp/bmkdata_checksum.tar | cut -f1 -d" " > /tmp/bmkdata_checksum && rm /tmp/bmkdata_checksum.tar + +# This may also be cacheable in most cases except when /bmk contents change +COPY ./cms-patatrack /bmk/./cms-patatrack + +RUN ./cms-patatrack/prepare-patch.sh + +#COPY common/bmk-driver.sh /bmk/./cms-patatrack/bmk-driver.sh + +RUN if [ ! -d /bmk/./cms-patatrack/data ]; then mkdir /bmk/./cms-patatrack/data; fi + +RUN for file in $(cd /bmk/data; ls); do ln -sf /bmk/data/$file /bmk/./cms-patatrack/data/$file; done + +RUN tar -cf /tmp/bmk_checksum.tar /bmk && md5sum /tmp/bmk_checksum.tar | cut -f1 -d" " > /tmp/bmk_checksum && rm /tmp/bmk_checksum.tar + +RUN cvmfs_checksum=`cat /tmp/cvmfs_checksum` && bmkdata_checksum=`cat /tmp/bmkdata_checksum` && bmk_checksum=`cat /tmp/bmk_checksum` && rm /tmp/cvmfs_checksum /tmp/bmkdata_checksum /tmp/bmk_checksum && echo '{"version":"v1.3","description":"CMS RECO of ttbar events, based on CMSSW_10_2_9","cvmfs_checksum":"'$cvmfs_checksum'","bmkdata_checksum":"'$bmkdata_checksum'","bmk_checksum":"'$bmk_checksum'"}' > /bmk/./cms-patatrack/version.json + +ENTRYPOINT ["/bmk/./cms-patatrack/cms-patatrack-bmk.sh"] + +# This contains provenance data that can never be cached +COPY ./cvmfs.provenance /cvmfs.provenance + +# Add user 'bmkuser' to run benchmarks as a non-root user (BMK-166 and BMK-167) +#RUN groupadd bmkuser + +#RUN useradd -g bmkuser --create-home --shell /bin/bash bmkuser + +# ********* DOCKERFILE TEMPLATE end ********* -- GitLab From d845d0b7b79cfc44916d67f4a072d65773c68225 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Tue, 30 Jun 2020 22:57:30 +0200 Subject: [PATCH 07/74] move cvmfs dir --- cms/patatrack/{cms-patatrack => }/cvmfs/.keepme | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cms/patatrack/{cms-patatrack => }/cvmfs/.keepme (100%) diff --git a/cms/patatrack/cms-patatrack/cvmfs/.keepme b/cms/patatrack/cvmfs/.keepme similarity index 100% rename from cms/patatrack/cms-patatrack/cvmfs/.keepme rename to cms/patatrack/cvmfs/.keepme -- GitLab From 1a54bfbc75e19b2c83c8788f982d7c9d8cb9ee31 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Tue, 30 Jun 2020 23:00:05 +0200 Subject: [PATCH 08/74] move cvmfs dir --- cms/patatrack/nvidia.Dockerfile.0 | 2 -- 1 file changed, 2 deletions(-) diff --git a/cms/patatrack/nvidia.Dockerfile.0 b/cms/patatrack/nvidia.Dockerfile.0 index 02b2976..6147005 100644 --- a/cms/patatrack/nvidia.Dockerfile.0 +++ b/cms/patatrack/nvidia.Dockerfile.0 @@ -13,9 +13,7 @@ RUN yum install -y \ RUN yum --enablerepo=extras install epel-release -y - RUN yum install -y python2-pip - RUN pip install --upgrade pip RUN pip install numpy scipy -- GitLab From 869a3b8d2b6b90073f1da6c1a2ee0bf12c393070 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Tue, 30 Jun 2020 23:26:33 +0200 Subject: [PATCH 09/74] fix image path --- cms/patatrack/nvidia.Dockerfile.1 | 2 +- cms/patatrack/nvidia.Dockerfile.2 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cms/patatrack/nvidia.Dockerfile.1 b/cms/patatrack/nvidia.Dockerfile.1 index aec2d54..9473763 100644 --- a/cms/patatrack/nvidia.Dockerfile.1 +++ b/cms/patatrack/nvidia.Dockerfile.1 @@ -1,4 +1,4 @@ -FROM gitlab-registry.cern.ch/hep-benchmarks-gpu/cms/cms-patatrack-nvidia-0:qa +FROM gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-0:qa # Prepare a data directory for downloading large files that should normally be cacheable (BMK-159) diff --git a/cms/patatrack/nvidia.Dockerfile.2 b/cms/patatrack/nvidia.Dockerfile.2 index e36535b..df2e276 100644 --- a/cms/patatrack/nvidia.Dockerfile.2 +++ b/cms/patatrack/nvidia.Dockerfile.2 @@ -1,4 +1,4 @@ -FROM gitlab-registry.cern.ch/hep-benchmarks-gpu/cms/cms-patatrack-nvidia-1:qa +FROM gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-1:qa # ********* DOCKERFILE TEMPLATE start ********* # ******* PLEASE DO NOT EDIT THIS FILE! ******* -- GitLab From c80a3008f8c43623cdf3cfe5be9741e90675a088 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Wed, 1 Jul 2020 00:44:33 +0200 Subject: [PATCH 10/74] fix patch path --- cms/patatrack/nvidia.Dockerfile.2 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cms/patatrack/nvidia.Dockerfile.2 b/cms/patatrack/nvidia.Dockerfile.2 index df2e276..f1f21eb 100644 --- a/cms/patatrack/nvidia.Dockerfile.2 +++ b/cms/patatrack/nvidia.Dockerfile.2 @@ -19,12 +19,12 @@ COPY ./cvmfs /cvmfs RUN tar -cf /tmp/cvmfs_checksum.tar /cvmfs && md5sum /tmp/cvmfs_checksum.tar | cut -f1 -d" " > /tmp/cvmfs_checksum && rm /tmp/cvmfs_checksum.tar # This should normally contain always the same files and be cacheable (BMK-159) -RUN tar -cf /tmp/bmkdata_checksum.tar /bmk/data && md5sum /tmp/bmkdata_checksum.tar | cut -f1 -d" " > /tmp/bmkdata_checksum && rm /tmp/bmkdata_checksum.tar +#RUN tar -cf /tmp/bmkdata_checksum.tar /bmk/data && md5sum /tmp/bmkdata_checksum.tar | cut -f1 -d" " > /tmp/bmkdata_checksum && rm /tmp/bmkdata_checksum.tar # This may also be cacheable in most cases except when /bmk contents change COPY ./cms-patatrack /bmk/./cms-patatrack -RUN ./cms-patatrack/prepare-patch.sh +RUN /bmk/./cms-patatrack/prepare-patch.sh #COPY common/bmk-driver.sh /bmk/./cms-patatrack/bmk-driver.sh @@ -32,7 +32,7 @@ RUN if [ ! -d /bmk/./cms-patatrack/data ]; then mkdir /bmk/./cms-patatrack/data; RUN for file in $(cd /bmk/data; ls); do ln -sf /bmk/data/$file /bmk/./cms-patatrack/data/$file; done -RUN tar -cf /tmp/bmk_checksum.tar /bmk && md5sum /tmp/bmk_checksum.tar | cut -f1 -d" " > /tmp/bmk_checksum && rm /tmp/bmk_checksum.tar +#RUN tar -cf /tmp/bmk_checksum.tar /bmk && md5sum /tmp/bmk_checksum.tar | cut -f1 -d" " > /tmp/bmk_checksum && rm /tmp/bmk_checksum.tar RUN cvmfs_checksum=`cat /tmp/cvmfs_checksum` && bmkdata_checksum=`cat /tmp/bmkdata_checksum` && bmk_checksum=`cat /tmp/bmk_checksum` && rm /tmp/cvmfs_checksum /tmp/bmkdata_checksum /tmp/bmk_checksum && echo '{"version":"v1.3","description":"CMS RECO of ttbar events, based on CMSSW_10_2_9","cvmfs_checksum":"'$cvmfs_checksum'","bmkdata_checksum":"'$bmkdata_checksum'","bmk_checksum":"'$bmk_checksum'"}' > /bmk/./cms-patatrack/version.json -- GitLab From a504b584155f30e862ffe4d847e5ad61f7e3bd5c Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Wed, 1 Jul 2020 01:11:43 +0200 Subject: [PATCH 11/74] fix patch path --- cms/patatrack/nvidia.Dockerfile.2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cms/patatrack/nvidia.Dockerfile.2 b/cms/patatrack/nvidia.Dockerfile.2 index f1f21eb..588daa8 100644 --- a/cms/patatrack/nvidia.Dockerfile.2 +++ b/cms/patatrack/nvidia.Dockerfile.2 @@ -34,7 +34,7 @@ RUN for file in $(cd /bmk/data; ls); do ln -sf /bmk/data/$file /bmk/./cms-patatr #RUN tar -cf /tmp/bmk_checksum.tar /bmk && md5sum /tmp/bmk_checksum.tar | cut -f1 -d" " > /tmp/bmk_checksum && rm /tmp/bmk_checksum.tar -RUN cvmfs_checksum=`cat /tmp/cvmfs_checksum` && bmkdata_checksum=`cat /tmp/bmkdata_checksum` && bmk_checksum=`cat /tmp/bmk_checksum` && rm /tmp/cvmfs_checksum /tmp/bmkdata_checksum /tmp/bmk_checksum && echo '{"version":"v1.3","description":"CMS RECO of ttbar events, based on CMSSW_10_2_9","cvmfs_checksum":"'$cvmfs_checksum'","bmkdata_checksum":"'$bmkdata_checksum'","bmk_checksum":"'$bmk_checksum'"}' > /bmk/./cms-patatrack/version.json +#RUN cvmfs_checksum=`cat /tmp/cvmfs_checksum` && bmkdata_checksum=`cat /tmp/bmkdata_checksum` && bmk_checksum=`cat /tmp/bmk_checksum` && rm /tmp/cvmfs_checksum /tmp/bmkdata_checksum /tmp/bmk_checksum && echo '{"version":"v1.3","description":"CMS RECO of ttbar events, based on CMSSW_10_2_9","cvmfs_checksum":"'$cvmfs_checksum'","bmkdata_checksum":"'$bmkdata_checksum'","bmk_checksum":"'$bmk_checksum'"}' > /bmk/./cms-patatrack/version.json ENTRYPOINT ["/bmk/./cms-patatrack/cms-patatrack-bmk.sh"] -- GitLab From 7547416d98289da67469589261649dda988c4b8b Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Wed, 1 Jul 2020 01:21:07 +0200 Subject: [PATCH 12/74] fix patch script --- cms/patatrack/cms-patatrack/prepare-patch.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cms/patatrack/cms-patatrack/prepare-patch.sh b/cms/patatrack/cms-patatrack/prepare-patch.sh index 8a45177..c619ac0 100755 --- a/cms/patatrack/cms-patatrack/prepare-patch.sh +++ b/cms/patatrack/cms-patatrack/prepare-patch.sh @@ -2,12 +2,15 @@ install_dir="/tmp/install" echo -e "\nCloning Patatrack repos into ${install_dir}..." -# Clone software repos -git clone https://github.com/cms-patatrack/patatrack-scripts ${install_dir}/patatrack-scripts -git clone https://github.com/sciaba/patatrack-tests ${install_dir}/patatrack-tests + +ls -l ${install_dir} cd $install_dir +# Clone software repos +git clone https://github.com/cms-patatrack/patatrack-scripts +git clone https://github.com/sciaba/patatrack-tests + echo -e "\nSet up Patatrack Scripts..." # Prepare scripts cp ${install_dir}/patatrack-tests/*/*.patch \ @@ -17,6 +20,8 @@ cp ${install_dir}/patatrack-tests/*/*.patch \ cd ${install_dir}/patatrack-scripts/ patch -b --forward workflow.sh workflow.patch +ls -l + ./workflow.sh patch -b --forward profile.py profile.patch #change .../patatrack-scripts/sourceFromPixelRaw_cff.py and point "fed_prefix" to .../extraction/path/.../store/opendata/cms -- GitLab From 6fe2f8cd8108bd3aedb35b9986dc4212d83cb41f Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Wed, 1 Jul 2020 01:36:25 +0200 Subject: [PATCH 13/74] generation of config at runtime --- cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh | 9 ++++++++- cms/patatrack/cms-patatrack/prepare-patch.sh | 5 ----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh index 58ffa5b..64efa42 100755 --- a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh +++ b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh @@ -31,7 +31,14 @@ function doOne(){ pwd ls - ${BMKDIR}/benchmark ${BMKDIR}/profile.py >>$LOG 2>&1 3>&1 + cd ${BMKDIR}/patatrack-scripts + ./workflow.sh + patch -b --forward profile.py profile.patch + #change .../patatrack-scripts/sourceFromPixelRaw_cff.py and point "fed_prefix" to .../extraction/path/.../store/opendata/cms + sed -i "s|/data/store/opendata/cms|${install_dir}/store/opendata/cms|g" sourceFromPixelRaw_cff.py + + + ./benchmark profile.py >>$LOG 2>&1 3>&1 status=${?} echo "[doOne ($1)] $(date) completed (status=$status)" # Return 0 if this workload copy was successful, 1 otherwise diff --git a/cms/patatrack/cms-patatrack/prepare-patch.sh b/cms/patatrack/cms-patatrack/prepare-patch.sh index c619ac0..d18daec 100755 --- a/cms/patatrack/cms-patatrack/prepare-patch.sh +++ b/cms/patatrack/cms-patatrack/prepare-patch.sh @@ -22,10 +22,5 @@ patch -b --forward workflow.sh workflow.patch ls -l -./workflow.sh -patch -b --forward profile.py profile.patch -#change .../patatrack-scripts/sourceFromPixelRaw_cff.py and point "fed_prefix" to .../extraction/path/.../store/opendata/cms -sed -i "s|/data/store/opendata/cms|${install_dir}/store/opendata/cms|g" sourceFromPixelRaw_cff.py - [ ! -d /bmk/cms-patatrack ] && mkdir -p /bmk/cms-patatrack cp -r ${install_dir}/patatrack-scripts /bmk/cms-patatrack \ No newline at end of file -- GitLab From 45b6c7ec6dbe661478a11bf20acaf3069dbc2185 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Wed, 1 Jul 2020 09:29:23 +0200 Subject: [PATCH 14/74] removed single dockerfile. Split steps --- cms/patatrack/Dockerfile.nvidia | 83 --------------------------------- 1 file changed, 83 deletions(-) delete mode 100644 cms/patatrack/Dockerfile.nvidia diff --git a/cms/patatrack/Dockerfile.nvidia b/cms/patatrack/Dockerfile.nvidia deleted file mode 100644 index 66c4928..0000000 --- a/cms/patatrack/Dockerfile.nvidia +++ /dev/null @@ -1,83 +0,0 @@ -FROM nvidia/cuda:10.1-devel-centos7 - -RUN yum install -y \ - which \ - man \ - file \ - util-linux \ - gcc \ - wget \ - tar freetype \ - perl perl-Data-Dumper \ - patch git vim; yum clean all - - -RUN yum --enablerepo=extras install epel-release -y - -RUN yum install -y python2-pip - -RUN pip install --upgrade pip -RUN pip install numpy scipy - - -# Prepare a data directory for downloading large files that should normally be cacheable (BMK-159) -# Its contents should be retrieved in Dockerfile.append, before /bmk/<bmkdir> is copied over -# Each file it contains is then individually symlinked to /bmk/<bmkdir>/data/<file> in Dockerfile.template -RUN mkdir -p /bmk/data - - -# Add here any workload-specific Dockerfile instructions. -# They will be appended to the Dockerfile generated from a common template. - - -RUN echo -e "\nExtracting Patatrack dataset..."; \ - wget -q https://hep-benchmarks.web.cern.ch/hep-benchmarks/hep-workloads/data/cms/patatrack/opendata.tar -O /bmk/data/opendata.tar; \ - cd /bmk/data/; tar -xvf ./opendata.tar - -# ********* DOCKERFILE TEMPLATE start ********* -# ******* PLEASE DO NOT EDIT THIS FILE! ******* -# This is the common template for all HEP workloads (BMK-124 and BMK-159). -# Please add workload-specific instructions in Dockerfile.append. - -# Optionally allow disabling the cache only from this point onwards if using -# docker build -t your-image --build-arg CACHEBUST=$(date +%s) . -# See https://github.com/moby/moby/issues/1996#issuecomment-185872769 - -###ARG CACHEBUST=1 - -###RUN echo CACHEBUST=$CACHEBUST - -# This should normally contain always the same files and be cacheable (BMK-159) -COPY ./cvmfs /cvmfs - -RUN tar -cf /tmp/cvmfs_checksum.tar /cvmfs && md5sum /tmp/cvmfs_checksum.tar | cut -f1 -d" " > /tmp/cvmfs_checksum && rm /tmp/cvmfs_checksum.tar - -# This should normally contain always the same files and be cacheable (BMK-159) -RUN tar -cf /tmp/bmkdata_checksum.tar /bmk/data && md5sum /tmp/bmkdata_checksum.tar | cut -f1 -d" " > /tmp/bmkdata_checksum && rm /tmp/bmkdata_checksum.tar - -# This may also be cacheable in most cases except when /bmk contents change -COPY ./cms-patatrack /bmk/./cms-patatrack - -RUN ./cms-patatrack/prepare-patch.sh - -#COPY common/bmk-driver.sh /bmk/./cms-patatrack/bmk-driver.sh - -RUN if [ ! -d /bmk/./cms-patatrack/data ]; then mkdir /bmk/./cms-patatrack/data; fi - -RUN for file in $(cd /bmk/data; ls); do ln -sf /bmk/data/$file /bmk/./cms-patatrack/data/$file; done - -RUN tar -cf /tmp/bmk_checksum.tar /bmk && md5sum /tmp/bmk_checksum.tar | cut -f1 -d" " > /tmp/bmk_checksum && rm /tmp/bmk_checksum.tar - -RUN cvmfs_checksum=`cat /tmp/cvmfs_checksum` && bmkdata_checksum=`cat /tmp/bmkdata_checksum` && bmk_checksum=`cat /tmp/bmk_checksum` && rm /tmp/cvmfs_checksum /tmp/bmkdata_checksum /tmp/bmk_checksum && echo '{"version":"v1.3","description":"CMS RECO of ttbar events, based on CMSSW_10_2_9","cvmfs_checksum":"'$cvmfs_checksum'","bmkdata_checksum":"'$bmkdata_checksum'","bmk_checksum":"'$bmk_checksum'"}' > /bmk/./cms-patatrack/version.json - -ENTRYPOINT ["/bmk/./cms-patatrack/cms-patatrack-bmk.sh"] - -# This contains provenance data that can never be cached -COPY ./cvmfs.provenance /cvmfs.provenance - -# Add user 'bmkuser' to run benchmarks as a non-root user (BMK-166 and BMK-167) -#RUN groupadd bmkuser - -#RUN useradd -g bmkuser --create-home --shell /bin/bash bmkuser - -# ********* DOCKERFILE TEMPLATE end ********* -- GitLab From de5dd63462f7b4879701cef76c78aa102d5819df Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Wed, 1 Jul 2020 09:31:48 +0200 Subject: [PATCH 15/74] comment build steps from automatic procedure --- cms/patatrack/nvidia.Dockerfile.2 | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/cms/patatrack/nvidia.Dockerfile.2 b/cms/patatrack/nvidia.Dockerfile.2 index 588daa8..198b1c0 100644 --- a/cms/patatrack/nvidia.Dockerfile.2 +++ b/cms/patatrack/nvidia.Dockerfile.2 @@ -19,31 +19,30 @@ COPY ./cvmfs /cvmfs RUN tar -cf /tmp/cvmfs_checksum.tar /cvmfs && md5sum /tmp/cvmfs_checksum.tar | cut -f1 -d" " > /tmp/cvmfs_checksum && rm /tmp/cvmfs_checksum.tar # This should normally contain always the same files and be cacheable (BMK-159) -#RUN tar -cf /tmp/bmkdata_checksum.tar /bmk/data && md5sum /tmp/bmkdata_checksum.tar | cut -f1 -d" " > /tmp/bmkdata_checksum && rm /tmp/bmkdata_checksum.tar +#RUN tar -cf /tmp/bmkdata_checksum.tar /bmk/data && md5sum /tmp/bmkdata_checksum.tar | cut -f1 -d" " > /tmp/bmkdata_checksum && rm /tmp/bmkdata_checksum.tar #FIXME # This may also be cacheable in most cases except when /bmk contents change COPY ./cms-patatrack /bmk/./cms-patatrack RUN /bmk/./cms-patatrack/prepare-patch.sh -#COPY common/bmk-driver.sh /bmk/./cms-patatrack/bmk-driver.sh +#COPY common/bmk-driver.sh /bmk/./cms-patatrack/bmk-driver.sh # FIXME RUN if [ ! -d /bmk/./cms-patatrack/data ]; then mkdir /bmk/./cms-patatrack/data; fi RUN for file in $(cd /bmk/data; ls); do ln -sf /bmk/data/$file /bmk/./cms-patatrack/data/$file; done -#RUN tar -cf /tmp/bmk_checksum.tar /bmk && md5sum /tmp/bmk_checksum.tar | cut -f1 -d" " > /tmp/bmk_checksum && rm /tmp/bmk_checksum.tar +#RUN tar -cf /tmp/bmk_checksum.tar /bmk && md5sum /tmp/bmk_checksum.tar | cut -f1 -d" " > /tmp/bmk_checksum && rm /tmp/bmk_checksum.tar #FIXME -#RUN cvmfs_checksum=`cat /tmp/cvmfs_checksum` && bmkdata_checksum=`cat /tmp/bmkdata_checksum` && bmk_checksum=`cat /tmp/bmk_checksum` && rm /tmp/cvmfs_checksum /tmp/bmkdata_checksum /tmp/bmk_checksum && echo '{"version":"v1.3","description":"CMS RECO of ttbar events, based on CMSSW_10_2_9","cvmfs_checksum":"'$cvmfs_checksum'","bmkdata_checksum":"'$bmkdata_checksum'","bmk_checksum":"'$bmk_checksum'"}' > /bmk/./cms-patatrack/version.json +#RUN cvmfs_checksum=`cat /tmp/cvmfs_checksum` && bmkdata_checksum=`cat /tmp/bmkdata_checksum` && bmk_checksum=`cat /tmp/bmk_checksum` && rm /tmp/cvmfs_checksum /tmp/bmkdata_checksum /tmp/bmk_checksum && echo '{"version":"v1.3","description":"CMS RECO of ttbar events, based on CMSSW_10_2_9","cvmfs_checksum":"'$cvmfs_checksum'","bmkdata_checksum":"'$bmkdata_checksum'","bmk_checksum":"'$bmk_checksum'"}' > /bmk/./cms-patatrack/version.json #FIXME ENTRYPOINT ["/bmk/./cms-patatrack/cms-patatrack-bmk.sh"] # This contains provenance data that can never be cached -COPY ./cvmfs.provenance /cvmfs.provenance +#COPY ./cvmfs.provenance /cvmfs.provenance #FIXME # Add user 'bmkuser' to run benchmarks as a non-root user (BMK-166 and BMK-167) -#RUN groupadd bmkuser - -#RUN useradd -g bmkuser --create-home --shell /bin/bash bmkuser +#RUN groupadd bmkuser #FIXME +#RUN useradd -g bmkuser --create-home --shell /bin/bash bmkuser #FIXME # ********* DOCKERFILE TEMPLATE end ********* -- GitLab From c36562859bb54a8a22d774d515ee1b98e52478bc Mon Sep 17 00:00:00 2001 From: olga <odatskov@cern.ch> Date: Wed, 1 Jul 2020 11:53:36 +0200 Subject: [PATCH 16/74] Switching to tmp for output --- .gitlab-ci.yml | 3 ++- lhc/simpletrack/CHANGELOG.md | 34 ++++++++++++------------------ lhc/simpletrack/README.md | 16 +++++++------- lhc/simpletrack/lhc-simpletrack.sh | 2 +- 4 files changed, 25 insertions(+), 30 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1a72409..5c36c48 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -61,7 +61,8 @@ simpletrack: strategy: depend only: variables: - - $CI_COMMIT_BRANCH == "qa" + - $CI_COMMIT_BRANCH =~ /^qa.*$/ + - $CI_COMMIT_TAG =~ /^v.*$/ changes: - lhc/simpletrack/Dockerfile.* - lhc/simpletrack/lhc-simpletrack.* diff --git a/lhc/simpletrack/CHANGELOG.md b/lhc/simpletrack/CHANGELOG.md index 15f9636..5f76ff0 100644 --- a/lhc/simpletrack/CHANGELOG.md +++ b/lhc/simpletrack/CHANGELOG.md @@ -1,32 +1,26 @@ -# 0.3.0 (June 28th 2020) +# QA + +FIXES: +* For unpriviledged Singularity runs switched to /tmp/jobs as output folder. + +# Master UPDATES: * intel: NEO version updated to 20.25.17111 and oneAPI DPC++ to 2020.10.6.0.4 (June releases). * ROCm container added +CHANGES: +* Tagged build based on the spec definition. +* Using trigger-based build to rebuild only on simpletrack changes. +* CI/CD basic functionality test added for the CPU-based container builds i.e. intel and pocl + FEATURES: * Switched to argument for benchmark setup instead of environment variables. - -# 0.2.1 (June 25th 2020) +* Added "benchmark" mode to run and generate json output for the runs. +* Generate yaml alongside the json summary. +* Standalone execution of the simpletrack benchmark without an orchestrator. FIXES: * ocl-icd-dev package explicitely installed now to avoid build failures. - -# 0.2.0 (June 16th 2020) - -FIXES: * Using simpletrack device lists instead of clinfo. -FEATURES: -* Added "benchmark" mode to run and generate json output for the runs. -* Generate yaml alongside the json summary. - -# 0.1.0 (June 13th 2020) - -FEATURES: -* Standalone execution of the simpletrack benchmark without an orchestrator. - -CHANGES: -* Tagged build based on the spec definition. -* Using trigger-based build to rebuild only on simpletrack changes. -* CI/CD basic functionality test added for the CPU-based container builds i.e. intel and pocl diff --git a/lhc/simpletrack/README.md b/lhc/simpletrack/README.md index a05d7ab..bfbb3e9 100644 --- a/lhc/simpletrack/README.md +++ b/lhc/simpletrack/README.md @@ -10,7 +10,7 @@ Docker images containing OpenCL-oriented Simpletrack benchmark built for a selec | | __intel__ | __rocm__ | __nvidia__ | __pocl__ | |--------------|:-----------:|:-----------:|:--------:|:----------:| | __GPU__ | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | -| __CPU__ | :heavy_check_mark: | :grey_question: | | :heavy_check_mark: | +| __CPU__ | :heavy_check_mark: | | | :heavy_check_mark: | # Usage @@ -33,15 +33,15 @@ Options: - Use the benchmark option "all" to execute runs on all available devices: ```~$ docker run --rm <image:tag> -b "all"``` - To discover available platforms use the show option: ```~$ docker run --rm <image:tag> -s``` -The benchmark mode allows to generate logs and output files in a default location (/simpletrack/examples/lhc/jobs or $CI_PROJECT_DIR) for either a single or all available devices. +The benchmark mode allows to generate logs and output files in a default location (/tmp/jobs or $CI_PROJECT_DIR) for either a single or all available devices. -## Docker GPU Passthrough +## GPU Passthrough To passthrough the device to the container, use the following options: -| Target | Passthrough option | -|:------------|:-------------------| -| __Nvidia__ | ```--gpus all``` | -| __AMD__ | ```--device /dev/kfd --device /dev/dri``` | -| __Intel__ | ```--device /dev/dri``` | +| Target | Docker | Singularity | +|:------------|:-------------------|:------------| +| __Nvidia__ | ```--gpus all``` | ```--nv``` | +| __AMD__ | ```--device /dev/kfd --device /dev/dri``` | ```--rocm``` | +| __Intel__ | ```--device /dev/dri``` | | diff --git a/lhc/simpletrack/lhc-simpletrack.sh b/lhc/simpletrack/lhc-simpletrack.sh index 8bf0ca1..5ff7776 100755 --- a/lhc/simpletrack/lhc-simpletrack.sh +++ b/lhc/simpletrack/lhc-simpletrack.sh @@ -73,7 +73,7 @@ get_json() { ################################### ####### Main ###################### -if [ ! "$CI_PROJECT_DIR" == "" ]; then WORK_DIR=$CI_PROJECT_DIR/jobs; else WORK_DIR=`pwd`/jobs; fi +if [ ! "$CI_PROJECT_DIR" == "" ]; then WORK_DIR=$CI_PROJECT_DIR/jobs; else WORK_DIR="/tmp/jobs"; fi if [ ! -d $WORK_DIR ]; then mkdir -p $WORK_DIR; fi if [ -f $WORK_DIR/out.log ]; then rm $WORK_DIR/out.log; fi -- GitLab From 0da002ab375a63feebc372868f600b5a0c128c7e Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Wed, 1 Jul 2020 12:26:46 +0200 Subject: [PATCH 17/74] add bmk user --- .gitlab-ci.yml | 3 ++- cms/patatrack/nvidia.Dockerfile.2 | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1a72409..8e74547 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -43,7 +43,8 @@ job_test_kv: patatrack: stage: triggers trigger: - include: cms/patatrack/cms-patatrack-ci.yml + include: + - local: cms/patatrack/cms-patatrack-ci.yml strategy: depend only: variables: diff --git a/cms/patatrack/nvidia.Dockerfile.2 b/cms/patatrack/nvidia.Dockerfile.2 index 198b1c0..25b13e7 100644 --- a/cms/patatrack/nvidia.Dockerfile.2 +++ b/cms/patatrack/nvidia.Dockerfile.2 @@ -42,7 +42,7 @@ ENTRYPOINT ["/bmk/./cms-patatrack/cms-patatrack-bmk.sh"] #COPY ./cvmfs.provenance /cvmfs.provenance #FIXME # Add user 'bmkuser' to run benchmarks as a non-root user (BMK-166 and BMK-167) -#RUN groupadd bmkuser #FIXME +RUN groupadd bmkuser -#RUN useradd -g bmkuser --create-home --shell /bin/bash bmkuser #FIXME +RUN useradd -g bmkuser --create-home --shell /bin/bash bmkuser # ********* DOCKERFILE TEMPLATE end ********* -- GitLab From 90e7f923d827a44d8ed07d75714e584112689fa6 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Wed, 1 Jul 2020 14:42:45 +0200 Subject: [PATCH 18/74] adding jq --- cms/patatrack/nvidia.Dockerfile.0 | 1 + cms/patatrack/nvidia.Dockerfile.1 | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cms/patatrack/nvidia.Dockerfile.0 b/cms/patatrack/nvidia.Dockerfile.0 index 6147005..ea44e6a 100644 --- a/cms/patatrack/nvidia.Dockerfile.0 +++ b/cms/patatrack/nvidia.Dockerfile.0 @@ -5,6 +5,7 @@ RUN yum install -y \ man \ file \ util-linux \ + jq \ gcc \ wget \ tar freetype \ diff --git a/cms/patatrack/nvidia.Dockerfile.1 b/cms/patatrack/nvidia.Dockerfile.1 index 9473763..6ee5e1a 100644 --- a/cms/patatrack/nvidia.Dockerfile.1 +++ b/cms/patatrack/nvidia.Dockerfile.1 @@ -13,5 +13,5 @@ RUN mkdir -p /bmk/data RUN echo -e "\nExtracting Patatrack dataset..."; \ wget -q https://hep-benchmarks.web.cern.ch/hep-benchmarks/hep-workloads/data/cms/patatrack/opendata.tar -O /bmk/data/opendata.tar; \ - cd /bmk/data/; tar -xvf ./opendata.tar + cd /bmk/data/; tar -xvf ./opendata.tar; rm ./opendata.tar -- GitLab From cb78f72231c9deb15801829488eef03fdb7403a3 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Wed, 1 Jul 2020 15:11:43 +0200 Subject: [PATCH 19/74] running sequence --- .../cms-patatrack/cms-patatrack-bmk.sh | 63 ++++++++++++++----- cms/patatrack/cms-patatrack/prepare-patch.sh | 4 ++ 2 files changed, 50 insertions(+), 17 deletions(-) diff --git a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh index 64efa42..cc55d92 100755 --- a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh +++ b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh @@ -6,6 +6,10 @@ #set -e # immediate exit on error +function myecho(){ + echo -e "[${FUNCNAME[1]}] $@" +} + # Function doOne must be defined in each benchmark # Input argument $1: process index (between 1 and $NCOPIES) # Return value: please return 0 if this workload copy was successful, 1 otherwise @@ -13,9 +17,40 @@ # The function is started in process-specific working directory <basewdir>/proc_$1: # please store here the individual log files for each of the NCOPIES processes function doOne(){ - if [ "$1" == "" ] || [ "$2" != "" ]; then echo "[doOne] ERROR! Invalid arguments '$@' to doOne"; return 1; fi - echo "[doOne ($1)] $(date) starting in $(pwd)" + if [ "$1" == "" ] || [ "$2" != "" ]; then myecho "ERROR! Invalid arguments '$@' to doOne"; return 1; fi + myecho "($1) $(date) starting in $(pwd)" # Extra CMS-PATATRACK-specific setup + + ####################################### + # This needs to be fixed + ln -s ${BMKDIR}/patatrack-scripts + ls -l + cd patatrack-scripts + myecho "current dir is `pwd`" + myecho "files in `pwd` are" + ls -l + ./benchmark profile.py #>>$LOG 2>&1 3>&1 + ####################################### + + status=${?} + myecho "($1) $(date) completed (status=$status)" + # Return 0 if this workload copy was successful, 1 otherwise + return $status +} + +# FIXME +# Using validateInputArguments for another purpose +# It woudl be useful to have a preparation function called by the driver + +# Optional function validateInputArguments may be defined in each benchmark +# If it exists, it is expected to set NCOPIES, NTHREADS, NEVENTS_THREAD +# (based on previous defaults and on user inputs USER_NCOPIES, USER_NTHREADS, USER_NEVENTS_THREADS) +# Input arguments: none +# Return value: please return 0 if input arguments are valid, 1 otherwise +# The following variables are guaranteed to be defined: NCOPIES, NTHREADS, NEVENTS_THREAD +# (benchmark defaults) and USER_NCOPIES, USER_NTHREADS, USER_NEVENTS_THREADS (user inputs) +function validateInputArguments(){ + export CMSSW_RELEASE=CMSSW_11_1_0_pre8_Patatrack export VO_CMS_SW_DIR=/cvmfs/cms.cern.ch export LC_ALL=en_US.UTF-8 @@ -25,29 +60,23 @@ function doOne(){ pushd ${CMSSW_RELEASE}; eval `scramv1 runtime -sh`; popd # Configure WL copy - echo -e "\nRunning benchmark..." - echo ${BMKDIR} - ls ${BMKDIR} - pwd - ls + myecho "current dir is `pwd`" + myecho "files in `pwd` are" + ls -l cd ${BMKDIR}/patatrack-scripts + myecho "Moving to `pwd`" + myecho "Preparing configuration files" ./workflow.sh patch -b --forward profile.py profile.patch #change .../patatrack-scripts/sourceFromPixelRaw_cff.py and point "fed_prefix" to .../extraction/path/.../store/opendata/cms - sed -i "s|/data/store/opendata/cms|${install_dir}/store/opendata/cms|g" sourceFromPixelRaw_cff.py - - - ./benchmark profile.py >>$LOG 2>&1 3>&1 - status=${?} - echo "[doOne ($1)] $(date) completed (status=$status)" - # Return 0 if this workload copy was successful, 1 otherwise - return $status + sed -i "s|/data/store/opendata/cms|/bmk/data/store/opendata/cms|g" sourceFromPixelRaw_cff.py + myecho "Configuration file done" + return 0 } - # Default values for NCOPIES, NTHREADS, NEVENTS_THREAD must be set in each benchmark -NTHREADS= +NTHREADS=1 NCOPIES=1 NEVENTS_THREAD=10 if [ "$NCOPIES" -lt 1 ]; then # when $NTHREADS > nproc diff --git a/cms/patatrack/cms-patatrack/prepare-patch.sh b/cms/patatrack/cms-patatrack/prepare-patch.sh index d18daec..bf2fbac 100755 --- a/cms/patatrack/cms-patatrack/prepare-patch.sh +++ b/cms/patatrack/cms-patatrack/prepare-patch.sh @@ -1,5 +1,9 @@ #!/bin/env bash +# FIXME: THIS set of replaces should simply go in the hep-worklaods-gpu repo +# the dependency from the sciaba repo should go away, and possibly also the onte from patatrack-scripts +# or at least use a specific branch of patatrack-scripts + install_dir="/tmp/install" echo -e "\nCloning Patatrack repos into ${install_dir}..." -- GitLab From 3d66c273d4df97cc2af2c06c56b6abcee624785f Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Wed, 1 Jul 2020 16:05:28 +0200 Subject: [PATCH 20/74] adding dry run for cvmfs tracing --- cms/patatrack/cms-patatrack-ci.yml | 47 +++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/cms/patatrack/cms-patatrack-ci.yml b/cms/patatrack/cms-patatrack-ci.yml index 63bd36b..0388189 100644 --- a/cms/patatrack/cms-patatrack-ci.yml +++ b/cms/patatrack/cms-patatrack-ci.yml @@ -3,7 +3,7 @@ stages: - build_0 - build_1 - build_2 -#- snapshot +- snapshot #- rebuild #- test @@ -11,6 +11,8 @@ stages: ## Templates ############# .definition_build_image: &template_build_image + tags: + - hep-workload-gpu-docker-builder image: # NB enable shared runners and do not specify a CI tag name: gitlab-registry.cern.ch/ci-tools/docker-image-builder # CERN version of the Kaniko image entrypoint: [""] @@ -38,8 +40,6 @@ stages: job_build_image_step0: stage: build_0 - tags: - - hep-workload-gpu-docker-builder before_script: - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/nvidia.Dockerfile.0 - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack @@ -53,8 +53,6 @@ job_build_image_step0: job_build_image_step1: stage: build_1 - tags: - - hep-workload-gpu-docker-builder before_script: - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/nvidia.Dockerfile.1 - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack @@ -68,8 +66,6 @@ job_build_image_step1: job_build_image_step2: stage: build_2 - tags: - - hep-workload-gpu-docker-builder before_script: - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/nvidia.Dockerfile.2 - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack @@ -81,3 +77,40 @@ job_build_image_step2: - cms/patatrack/* - cms/patatrack/cms-patatrack/* - cms/patatrack/cms-patatrack/*/* + +job_snapshot_cvmfs: + stage: snapshot + tags: + - hep-workload-gpu-docker-builder + image: + name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa + before_script: + - export CIENV_CVMFSVOLUME=/scratch/cvmfs_hep/CI-JOB-${CI_JOB_ID} + - export CIENV_CVMFSREPO=cms.cern.ch + - export CVMFS_IMAGE=gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/cvmfs-image:${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} + - docker pull ${CVMFS_IMAGE} + - docker run --name cvmfs_${CI_JOB_ID} -d --privileged -v ${CIENV_CVMFSVOLUME}:/cvmfs:shared ${CVMFS_IMAGE} -r ${CIENV_CVMFSREPO} -t /tmp/traces + script: + - sleep 1m # to give time to cvmfs to start + - export CIENV_CVMFSVOLUME=/scratch/cvmfs_hep/CI-JOB-${CI_JOB_ID} + - docker exec cvmfs_${CI_JOB_ID} cvmfs_config probe + # Here comes the dry run of the CMS Patatrack container. Arguments are for the time being defaults/hardcoded FIXME + - docker run --rm --gpus '"device=0"' -v ${CIENV_CVMFSVOLUME}:/cvmfs gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa + - docker exec cvmfs_${CI_JOB_ID} cvmfs_talk -i ${CIENV_CVMFSREPO} tracebuffer flush + - docker exec cvmfs_${CI_JOB_ID} python /usr/libexec/cvmfs/shrinkwrap/spec_builder.py --policy=exact /tmp/traces/cvmfs-${CIENV_CVMFSREPO}.trace.log /tmp/traces/cvmfs-${CIENV_CVMFSREPO}.spec + - docker cp cvmfs_${CI_JOB_ID}:/tmp/traces ${CI_PROJECT_DIR}/traces + after_script: + - docker rm -f cvmfs_${CI_JOB_ID} + only: + variables: + - $CI_COMMIT_BRANCH =~ /^qa.*$/ + - $CI_COMMIT_TAG =~ /^v.*$/ + changes: + - cms/patatrack/* + - cms/patatrack/cms-patatrack/* + - cms/patatrack/cms-patatrack/*/* + artifacts: + paths: + - ${CI_PROJECT_DIR}/traces + expire_in: 1 week + when: always \ No newline at end of file -- GitLab From 2b76f5055c70b28e8b1de2f80d8e18bd9abf0ced Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Wed, 1 Jul 2020 16:07:01 +0200 Subject: [PATCH 21/74] adding dry run for cvmfs tracing --- cms/patatrack/cms-patatrack-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cms/patatrack/cms-patatrack-ci.yml b/cms/patatrack/cms-patatrack-ci.yml index 0388189..82a2466 100644 --- a/cms/patatrack/cms-patatrack-ci.yml +++ b/cms/patatrack/cms-patatrack-ci.yml @@ -109,7 +109,7 @@ job_snapshot_cvmfs: - cms/patatrack/* - cms/patatrack/cms-patatrack/* - cms/patatrack/cms-patatrack/*/* - artifacts: + artifacts: paths: - ${CI_PROJECT_DIR}/traces expire_in: 1 week -- GitLab From ed20ca35f624f92a5fd0a7053dda775432a35b97 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Wed, 1 Jul 2020 23:23:47 +0200 Subject: [PATCH 22/74] added empty provenance --- cms/patatrack/cvmfs/{.keepme => .provenance} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cms/patatrack/cvmfs/{.keepme => .provenance} (100%) diff --git a/cms/patatrack/cvmfs/.keepme b/cms/patatrack/cvmfs/.provenance similarity index 100% rename from cms/patatrack/cvmfs/.keepme rename to cms/patatrack/cvmfs/.provenance -- GitLab From 8497143116b6ae877b9213714e449411c5baa2f9 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Wed, 1 Jul 2020 23:28:02 +0200 Subject: [PATCH 23/74] full chain patatrack --- cms/patatrack/cms-patatrack-ci.yml | 35 ++++++++++++++++++++++++------ cms/patatrack/nvidia.Dockerfile.2 | 2 +- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/cms/patatrack/cms-patatrack-ci.yml b/cms/patatrack/cms-patatrack-ci.yml index 82a2466..30275a4 100644 --- a/cms/patatrack/cms-patatrack-ci.yml +++ b/cms/patatrack/cms-patatrack-ci.yml @@ -4,7 +4,7 @@ stages: - build_1 - build_2 - snapshot -#- rebuild +- build_standalone #- test ########################## @@ -86,19 +86,24 @@ job_snapshot_cvmfs: name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa before_script: - export CIENV_CVMFSVOLUME=/scratch/cvmfs_hep/CI-JOB-${CI_JOB_ID} + - export CVMFS_EXPORT_DIR=${CI_PROJECT_DIR}/cms/patatrack - export CIENV_CVMFSREPO=cms.cern.ch - export CVMFS_IMAGE=gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/cvmfs-image:${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} - docker pull ${CVMFS_IMAGE} - - docker run --name cvmfs_${CI_JOB_ID} -d --privileged -v ${CIENV_CVMFSVOLUME}:/cvmfs:shared ${CVMFS_IMAGE} -r ${CIENV_CVMFSREPO} -t /tmp/traces + - docker run --name cvmfs_${CI_JOB_ID} -d --privileged -v ${CVMFS_EXPORT_DIR}:${CVMFS_EXPORT_DIR} -v ${CIENV_CVMFSVOLUME}:/cvmfs:shared ${CVMFS_IMAGE} -r ${CIENV_CVMFSREPO} -t /tmp/traces script: - sleep 1m # to give time to cvmfs to start - export CIENV_CVMFSVOLUME=/scratch/cvmfs_hep/CI-JOB-${CI_JOB_ID} + - export CVMFS_EXPORT_DIR=${CI_PROJECT_DIR}/cms/patatrack + # check cvmfs is running - docker exec cvmfs_${CI_JOB_ID} cvmfs_config probe # Here comes the dry run of the CMS Patatrack container. Arguments are for the time being defaults/hardcoded FIXME - docker run --rm --gpus '"device=0"' -v ${CIENV_CVMFSVOLUME}:/cvmfs gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa - - docker exec cvmfs_${CI_JOB_ID} cvmfs_talk -i ${CIENV_CVMFSREPO} tracebuffer flush - - docker exec cvmfs_${CI_JOB_ID} python /usr/libexec/cvmfs/shrinkwrap/spec_builder.py --policy=exact /tmp/traces/cvmfs-${CIENV_CVMFSREPO}.trace.log /tmp/traces/cvmfs-${CIENV_CVMFSREPO}.spec - - docker cp cvmfs_${CI_JOB_ID}:/tmp/traces ${CI_PROJECT_DIR}/traces + # run shrinkwrapper + - docker exec cvmfs_${CI_JOB_ID} /root/shrinkwrap.sh -t /tmp/traces/ -e ${CVMFS_EXPORT_DIR} -j ${CVMFS_EXPORT_DIR} + # remove duplicated data + - rm -rf ${CVMFS_EXPORT_DIR}/cvmfs/.data + - ls -R ${CVMFS_EXPORT_DIR} > ${CI_PROJECT_DIR}/cvmfs_export_dir_content after_script: - docker rm -f cvmfs_${CI_JOB_ID} only: @@ -111,6 +116,22 @@ job_snapshot_cvmfs: - cms/patatrack/cms-patatrack/*/* artifacts: paths: - - ${CI_PROJECT_DIR}/traces + - ${CI_PROJECT_DIR}/traces + - ${CI_PROJECT_DIR}/cvmfs_export_dir_content + - ${CI_PROJECT_DIR}/cms/patatrack/cvmfs expire_in: 1 week - when: always \ No newline at end of file + when: always + +job_build_standalone_image: + stage: build_standalone + before_script: + - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/nvidia.Dockerfile.2 + - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack + - export IMAGE_NAME=cms/cms-patatrack-nvidia-bmk + - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} + <<: *template_build_image + only: + changes: + - cms/patatrack/* + - cms/patatrack/cms-patatrack/* + - cms/patatrack/cms-patatrack/*/* \ No newline at end of file diff --git a/cms/patatrack/nvidia.Dockerfile.2 b/cms/patatrack/nvidia.Dockerfile.2 index 25b13e7..a94f708 100644 --- a/cms/patatrack/nvidia.Dockerfile.2 +++ b/cms/patatrack/nvidia.Dockerfile.2 @@ -39,7 +39,7 @@ RUN for file in $(cd /bmk/data; ls); do ln -sf /bmk/data/$file /bmk/./cms-patatr ENTRYPOINT ["/bmk/./cms-patatrack/cms-patatrack-bmk.sh"] # This contains provenance data that can never be cached -#COPY ./cvmfs.provenance /cvmfs.provenance #FIXME +COPY ./cvmfs/.provenance /cvmfs/.provenance # Add user 'bmkuser' to run benchmarks as a non-root user (BMK-166 and BMK-167) RUN groupadd bmkuser -- GitLab From 57e8a23c7993dd645dbf5d6a5ba0be5722283a15 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Wed, 1 Jul 2020 23:51:04 +0200 Subject: [PATCH 24/74] fix Nthread --- cms/patatrack/cms-patatrack/bmk-driver.sh | 158 +++++++++--------- .../cms-patatrack/cms-patatrack-bmk.sh | 10 +- 2 files changed, 84 insertions(+), 84 deletions(-) diff --git a/cms/patatrack/cms-patatrack/bmk-driver.sh b/cms/patatrack/cms-patatrack/bmk-driver.sh index b119f06..0cdb9be 100644 --- a/cms/patatrack/cms-patatrack/bmk-driver.sh +++ b/cms/patatrack/cms-patatrack/bmk-driver.sh @@ -1,20 +1,24 @@ -if [ "$BASH_SOURCE" = "$0" ]; then echo "ERROR! This script ($0) was not sourced"; exit 1; fi -if [ "$BASH_SOURCE" = "" ]; then echo "ERROR! This script was not sourced from bash"; return 1; fi +if [ "$BASH_SOURCE" = "$0" ]; then myecho "ERROR! This script ($0) was not sourced"; exit 1; fi +if [ "$BASH_SOURCE" = "" ]; then myecho "ERROR! This script was not sourced from bash"; return 1; fi bmkDriver=$(basename ${BASH_SOURCE}) bmkScript=$(basename $0) BMKDIR=$(cd $(dirname $0); pwd) +function myecho(){ + echo -e "[${FUNCNAME[1]}] $@" +} + function advertise_bmkdriver(){ - echo -e "\n========================================================================" - echo -e "[$bmkDriver] $(date) entering common benchmark driver" - echo -e "========================================================================\n" - echo -e "[$bmkDriver] entering from $bmkScript\n" + myecho "\n========================================================================" + myecho "[$bmkDriver] $(date) entering common benchmark driver" + myecho "========================================================================\n" + myecho "[$bmkDriver] entering from $bmkScript\n" # Dump workload-specific directory - echo -e "[$bmkDriver] benchmark directory BMKDIR=${BMKDIR}:\n" + myecho "[$bmkDriver] benchmark directory BMKDIR=${BMKDIR}:\n" ls -lRt $BMKDIR if [ -d $BMKDIR/../data ]; then - echo -e "\n[$bmkDriver] data directory ${BMKDIR}/../data:\n" + myecho "\n[$bmkDriver] data directory ${BMKDIR}/../data:\n" ls -lRt $BMKDIR/../data fi echo @@ -24,21 +28,21 @@ function advertise_bmkdriver(){ function check_mandatory_functions(){ # Check that function doOne has been defined if [ "$(type -t doOne)" != "function" ]; then - echo "[$bmkDriver] ERROR! Function 'doOne' must be defined in $bmkScript" # internal error (missing code) + myecho "[$bmkDriver] ERROR! Function 'doOne' must be defined in $bmkScript" # internal error (missing code) exit 1; fi # Check that function parseResults has been defined, otherwise load it from parseResults.sh if [ "$(type -t parseResults)" != "function" ]; then - echo "[$bmkDriver] load parseResults.sh (function 'parseResults' is not defined in $bmkScript)" + myecho "[$bmkDriver] load parseResults.sh (function 'parseResults' is not defined in $bmkScript)" if [ -f ${BMKDIR}/parseResults.sh ]; then - echo -e "[$bmkDriver] sourcing ${BMKDIR}/parseResults.sh\n" + myecho "[$bmkDriver] sourcing ${BMKDIR}/parseResults.sh\n" . ${BMKDIR}/parseResults.sh if [ "$(type -t parseResults)" != "function" ]; then - echo "[$bmkDriver] ERROR! Function 'parseResults' must be defined in $bmkScript or parseResults.sh" # internal error (missing code) + myecho "[$bmkDriver] ERROR! Function 'parseResults' must be defined in $bmkScript or parseResults.sh" # internal error (missing code) exit 1; fi else - echo -e "[$bmkDriver] ERROR! 'parseResults' not defined and ${BMKDIR}/parseResults.sh not found\n" # internal error (missing code) + myecho "[$bmkDriver] ERROR! 'parseResults' not defined and ${BMKDIR}/parseResults.sh not found\n" # internal error (missing code) exit 1 fi fi @@ -49,7 +53,7 @@ function check_mandatory_variables(){ # Variables NCOPIES, NTHREADS, NEVENTS_THREAD have default values specific to each benchmark for var in NCOPIES NTHREADS NEVENTS_THREAD; do if [ "${!var}" == "" ]; then - echo "[$bmkDriver] ERROR! A default value of $var must be set in $bmkScript" # internal error (missing code) + myecho "[$bmkDriver] ERROR! A default value of $var must be set in $bmkScript" # internal error (missing code) exit 1; fi done @@ -69,15 +73,15 @@ DEBUG=0 function advertise_user_defined_variables(){ for var in NCOPIES NTHREADS NEVENTS_THREAD; do - echo "Default (from $bmkScript): $var=${!var}" + myecho "Default (from $bmkScript): $var=${!var}" done echo for var in USER_NCOPIES USER_NTHREADS USER_NEVENTS_THREAD; do - echo "Default (from $bmkDriver): $var=${!var}" + myecho "Default (from $bmkDriver): $var=${!var}" done echo for var in resultsDir skipSubDir DEBUG; do - echo "Default (from $bmkDriver): $var=${!var}" + myecho "Default (from $bmkDriver): $var=${!var}" done } @@ -113,10 +117,10 @@ function usage(){ echo "Without -w (default) and with -W: <resultsDir> is a tmp directory /tmp/xxxx" echo "" if [ "$(type -t usage_detailed)" == "function" ]; then - echo -e "\nDetailed Usage:\n----------------\n" + echo "\nDetailed Usage:\n----------------\n" ( usage_detailed ) # as a subprocess, just in case this has a 0 exit code... fi - echo -e "DESCRIPTION\n" + echo "DESCRIPTION\n" if [ -e $BMKDIR/DESCRIPTION ]; then cat $BMKDIR/DESCRIPTION else @@ -138,7 +142,7 @@ while getopts "c:t:e:w:Wdh" o; do if [ $OPTARG -gt 0 ]; then USER_NCOPIES=$OPTARG else - echo "[$bmkDriver] ERROR! Invalid argument '-c $OPTARG' (must be > 0)" + myecho "[$bmkDriver] ERROR! Invalid argument '-c $OPTARG' (must be > 0)" exit 1 # early termination (invalid arguments to benchmark script) fi ;; @@ -146,11 +150,11 @@ while getopts "c:t:e:w:Wdh" o; do if [ $OPTARG -gt 0 ]; then USER_NTHREADS=$OPTARG if [ $NTHREADS -eq 1 ] && [ $USER_NTHREADS -ne 1 ]; then - echo "[$bmkDriver] ERROR! Invalid argument '-t $OPTARG' (default NTHREADS=1 cannot be changed)" + myecho "[$bmkDriver] ERROR! Invalid argument '-t $OPTARG' (default NTHREADS=1 cannot be changed)" exit 1 # early termination (invalid arguments to benchmark script) fi else - echo "[$bmkDriver] ERROR! Invalid argument '-t $OPTARG' (must be > 0)" + myecho "[$bmkDriver] ERROR! Invalid argument '-t $OPTARG' (must be > 0)" exit 1 # early termination (invalid arguments to benchmark script) fi ;; @@ -158,7 +162,7 @@ while getopts "c:t:e:w:Wdh" o; do if [ $OPTARG -gt 0 ]; then USER_NEVENTS_THREAD=$OPTARG else - echo "[$bmkDriver] ERROR! Invalid argument '-e $OPTARG' (must be > 0)" + myecho "[$bmkDriver] ERROR! Invalid argument '-e $OPTARG' (must be > 0)" exit 1 fi ;; @@ -178,7 +182,7 @@ while getopts "c:t:e:w:Wdh" o; do done if [ "$DEBUG" == 1 ]; then - echo -e "\n[$bmkDriver] Parse input arguments '$@'\n" + myecho "\n[$bmkDriver] Parse input arguments '$@'\n" advertise_bmkdriver advertise_user_defined_variables fi @@ -197,11 +201,11 @@ check_mandatory_variables # Dump all relevant variables after parsing the input arguments for var in USER_NCOPIES USER_NTHREADS USER_NEVENTS_THREAD; do - echo "Current value: $var=${!var}" + myecho "Current value: $var=${!var}" done echo for var in resultsDir skipSubDir DEBUG; do - echo "Current value: $var=${!var}" + myecho "Current value: $var=${!var}" done echo @@ -211,10 +215,10 @@ if [ "${resultsDir}" == "" ]; then ###echo "[$bmkDriver] ERROR! resultsDir not specified ('-w' missing)" ###exit 1 # early termination (invalid arguments to benchmark script) if [ "$skipSubDir" == "1" ]; then - echo -e "[$bmkDriver] WARNING! resultsDir not specified ('-w' missing), but '-W' is present: create a directory in /tmp\n" + myecho "[$bmkDriver] WARNING! resultsDir not specified ('-w' missing), but '-W' is present: create a directory in /tmp\n" resultsDir=$(mktemp -d) else - echo -e "[$bmkDriver] WARNING! resultsDir not specified ('-w' missing) and '-W' is missing: assume '/results'\n" + myecho "[$bmkDriver] WARNING! resultsDir not specified ('-w' missing) and '-W' is missing: assume '/results'\n" resultsDir=/results fi fi @@ -223,7 +227,7 @@ fi if [ ! -d ${resultsDir} ]; then mkdir -p ${resultsDir} if [ "$?" != "0" ]; then - echo "[$bmkDriver] ERROR! directory '${resultsDir}' not found and could not be created" + myecho "[$bmkDriver] ERROR! directory '${resultsDir}' not found and could not be created" exit 1 # early termination (cannot start processing) fi fi @@ -236,25 +240,25 @@ fail=0 # Call function validateInputArguments if it exists if [ "$(type -t validateInputArguments)" != "function" ]; then - echo -e "[$bmkDriver] function 'validateInputArguments' not found: use input arguments as given\n" + myecho "[$bmkDriver] function 'validateInputArguments' not found: use input arguments as given\n" if [ "$USER_NCOPIES" != "" ]; then NCOPIES=$USER_NCOPIES; fi if [ "$USER_NTHREADS" != "" ]; then NTHREADS=$USER_NTHREADS; fi # already checked that USER_NTHREADS must be 1 if NTHREADS is 1 if [ "$USER_NEVENTS_THREAD" != "" ]; then NEVENTS_THREAD=$USER_NEVENTS_THREAD; fi else - echo -e "[$bmkDriver] function 'validateInputArguments' starting\n" + myecho "[$bmkDriver] function 'validateInputArguments' starting\n" if ! validateInputArguments; then fail=-1; fi - echo -e "\n[$bmkDriver] function 'validateInputArguments' completed (status=$fail)\n" + myecho "\n[$bmkDriver] function 'validateInputArguments' completed (status=$fail)\n" fi # Set baseWDir and create it if necessary if [ "$skipSubDir" == "1" ]; then baseWDir=${resultsDir} - echo -e "[$bmkDriver] base working directory : $baseWDir\n" + myecho "[$bmkDriver] base working directory : $baseWDir\n" else baseWDir=${resultsDir}/$(basename $0 -bmk.sh)-c${NCOPIES}-e${NEVENTS_THREAD}-$(date +%s)_$(((RANDOM%9000)+1000)) - echo -e "[$bmkDriver] base working directory : $baseWDir\n" + myecho "[$bmkDriver] base working directory : $baseWDir\n" if ! mkdir $baseWDir; then - echo "[$bmkDriver] ERROR! directory '${baseWDir}' cannot be created" + myecho "[$bmkDriver] ERROR! directory '${baseWDir}' cannot be created" exit 1 # early termination (cannot start processing) fi fi @@ -265,11 +269,11 @@ baseWDir=$(cd $baseWDir; pwd) touch $baseWDir/inputs.log for var in NCOPIES NTHREADS NEVENTS_THREAD; do if [ "${!var}" == "" ] || ! [[ ${!var} =~ ^[0-9]+$ ]] || [ ! ${!var} -gt 0 ]; then - echo "[$bmkDriver] ERROR! Invalid value $var=${!var}" + myecho "[$bmkDriver] ERROR! Invalid value $var=${!var}" exit 1; fi - echo "Current value: $var=${!var}" - echo "$var=${!var}" >> $baseWDir/inputs.log + myecho "Current value: $var=${!var}" + myecho "$var=${!var}" >> $baseWDir/inputs.log done echo @@ -280,47 +284,47 @@ fi # Define APP before doOne (BMK-152) and parseResults APP=$(basename ${BMKDIR}) # or equivalently here $(basename $0 -bmk.sh) -echo -e "[$bmkDriver] APP=${APP}\n" +myecho "[$bmkDriver] APP=${APP}\n" # Wrapper for the doOne function function doOneWrapper(){ if [ "$1" == "" ] || [ "$2" != "" ]; then - echo -e "[$bmkDriver] ERROR! Invalid arguments '$@' to doOneWrapper" # internal error (inconsistent code) + myecho "[$bmkDriver] ERROR! Invalid arguments '$@' to doOneWrapper" # internal error (inconsistent code) return 1 # NB: return or exit are equivalent here because doOneWrapper is executed as a subprocess fi - echo -e "\n[doOneWrapper ($1)] $(date) : process $1 started" + myecho "\n[doOneWrapper ($1)] $(date) : process $1 started" ###sleep 5 # this is not needed if the list of jobs is compiled from all '$!' workDir=$(pwd)/proc_$1 # current directory is $baseWDir here - echo -e "[doOneWrapper ($1)] workdir is ${workDir}" + myecho "[doOneWrapper ($1)] workdir is ${workDir}" if ! mkdir -p $workDir || ! cd $workDir; then - echo -e "\n[doOneWrapper ($1)] $(date) : process $1 failed (cannot create workdir)\n" + myecho "\n[doOneWrapper ($1)] $(date) : process $1 failed (cannot create workdir)\n" return 1 fi log=${workDir}/doOneWrapper_$1.log - echo -e "[doOneWrapper ($1)] logfile is $log" + myecho "[doOneWrapper ($1)] logfile is $log" if ! touch $log ; then - echo -e "\n[doOneWrapper ($1)] $(date) : process $1 failed (cannot create logfile)\n" + myecho "\n[doOneWrapper ($1)] $(date) : process $1 failed (cannot create logfile)\n" return 1 fi - echo -e "[doOneWrapper ($1)] $(date) : process $1 configured" 2>&1 | tee -a $log # configured means that log exists + myecho "[doOneWrapper ($1)] $(date) : process $1 configured" 2>&1 | tee -a $log # configured means that log exists mkdir $workDir/HOME export HOME=$workDir/HOME # avoid writing to /root in read-only docker or to host HOME in singularity (BMK-166) - echo -e "[doOneWrapper ($1)] HOME=$HOME" 2>&1 | tee -a $log + myecho "[doOneWrapper ($1)] HOME=$HOME" 2>&1 | tee -a $log cd -P /proc/self && basename $PWD | ( read thispid; \ - echo -e "[doOneWrapper ($1)] current process pid is $thispid" 2>&1 | tee -a $log ) # see https://stackoverflow.com/a/15170225 + myecho "[doOneWrapper ($1)] current process pid is $thispid" 2>&1 | tee -a $log ) # see https://stackoverflow.com/a/15170225 cd - > /dev/null local pid=$(cat $log | grep "current process pid is" | sed -e "s/.*current process pid is //") local parsertest=0 # hardcoded: 0 => doOne (default); 1 => test the parser on old logs and bypass doOne (BMK-152) if [ $parsertest -eq 0 ]; then if [ "$(whoami)" == "root" ] && cat /proc/self/cgroup | cut -d/ -f2 | grep docker > /dev/null; then - echo -e "[doOneWrapper ($1)] inside docker - run doOne as bmkuser\n" 2>&1 | tee -a $log + myecho "[doOneWrapper ($1)] inside docker - run doOne as bmkuser\n" 2>&1 | tee -a $log export -f doOne chown -R bmkuser:bmkuser $workDir 2>&1 | tee -a $log su bmkuser -s /bin/bash -c "doOne $1" 2>&1 | tee -a $log local status=${PIPESTATUS[0]} # NB do not use $? if you pipe to tee! chown -R root:root $workDir 2>&1 | tee -a $log else - echo -e "[doOneWrapper ($1)] not inside docker - run doOne as $(whoami)\n" 2>&1 | tee -a $log + myecho "[doOneWrapper ($1)] not inside docker - run doOne as $(whoami)\n" 2>&1 | tee -a $log doOne $1 2>&1 | tee -a $log local status=${PIPESTATUS[0]} # NB do not use $? if you pipe to tee! fi @@ -328,13 +332,13 @@ function doOneWrapper(){ cp -dpr $BMKDIR/jobs/refjob/proc_$1/* . local status=$? \rm -f *${APP}*.json - echo -e "[doOneWrapper ($1)] DUMMY doOne: copy old logs for parser tests (BMK-152)" + myecho "[doOneWrapper ($1)] DUMMY doOne: copy old logs for parser tests (BMK-152)" fi if [ "$status" == "0" ]; then - echo -e "\n[doOneWrapper ($1)] $(date) : process $1 (pid=$pid) completed ok\n" 2>&1 | tee -a $log + myecho "\n[doOneWrapper ($1)] $(date) : process $1 (pid=$pid) completed ok\n" 2>&1 | tee -a $log return 0 else - echo -e "\n[doOneWrapper ($1)] $(date) : process $1 (pid=$pid) failed\n" 2>&1 | tee -a $log + myecho "\n[doOneWrapper ($1)] $(date) : process $1 (pid=$pid) failed\n" 2>&1 | tee -a $log return 1 fi } @@ -348,21 +352,21 @@ done if [ $fail -eq 0 ]; then # Spawn subprocesses (and keep track of their list of them using '$!') - echo -e "------------------------------------------------------------------------" - echo -e "[$bmkDriver] spawn $NCOPIES processes" - echo -e "------------------------------------------------------------------------\n" + myecho "------------------------------------------------------------------------" + myecho "[$bmkDriver] spawn $NCOPIES processes" + myecho "------------------------------------------------------------------------\n" jobs="" for i in $(seq 1 $NCOPIES); do ( cd $baseWDir; doOneWrapper $i ) & ipid=$! - [ $DEBUG -gt 0 ] && echo -e "[$bmkDriver] spawned process $i with pid $ipid" + [ $DEBUG -gt 0 ] && myecho "[$bmkDriver] spawned process $i with pid $ipid" jobs="$jobs $ipid" sleep 0.1 # stagger job creation by 100ms done # Wait for all subprocesses to complete and check their exit codes # [NB: do not use 'jobs -p': some jobs may be missing if already completed] - [ $DEBUG -gt 0 ] && echo -e "\n[$bmkDriver] $(date) ... waiting for spawned processes with pid's$jobs\n" + [ $DEBUG -gt 0 ] && myecho "\n[$bmkDriver] $(date) ... waiting for spawned processes with pid's$jobs\n" wait $jobs > /dev/null 2>&1 fail=0 # unnecessary but harmless (this code is only executed if $fail -eq 0) for i in $(seq 1 $NCOPIES); do @@ -370,17 +374,17 @@ if [ $fail -eq 0 ]; then let "fail+=1" fi done - echo -e "\n------------------------------------------------------------------------" + myecho "\n------------------------------------------------------------------------" if [ $fail -gt 0 ]; then - echo "[$bmkDriver] ERROR! $fail processes failed (out of $NCOPIES)" + myecho "[$bmkDriver] ERROR! $fail processes failed (out of $NCOPIES)" else - echo "[$bmkDriver] all $NCOPIES processes completed successfully" + myecho "[$bmkDriver] all $NCOPIES processes completed successfully" fi - echo -e "------------------------------------------------------------------------\n" + myecho "------------------------------------------------------------------------\n" # Skip the doOne step if validateInputArguments failed else - echo -e "[$bmkDriver] validateInputArguments failed: skip doOne processing" + myecho "[$bmkDriver] validateInputArguments failed: skip doOne processing" fi # Parse results and generate summary using function parseResults @@ -389,36 +393,36 @@ fi # - if a separate function generateSummary exists, it must be internally called by parseResults # - the environment variable APP=<vo>-<workload> defines the name of the json file ${APP}_summary.json cd $baseWDir -echo -e "[$bmkDriver] parse results and generate summary: starting" -echo -e "[$bmkDriver] current directory : $(pwd)\n" +myecho "[$bmkDriver] parse results and generate summary: starting" +myecho "[$bmkDriver] current directory : $(pwd)\n" parseResults $fail parse=$? -echo -e "\n[$bmkDriver] parse results and generate summary: completed (status=$parse)" +myecho "\n[$bmkDriver] parse results and generate summary: completed (status=$parse)" # Validate json files syntax (BMK-137) cd $baseWDir -echo -e "\n[$bmkDriver] json file validation: starting" +myecho "\n[$bmkDriver] json file validation: starting" json=0 jsonFile=$baseWDir/${APP}_summary.json jsonFile_new=$baseWDir/${APP}_summary_new.json if [ ! -f ${jsonFile} ]; then - echo -e "[$bmkDriver] ERROR! json file '${jsonFile}' not found" + myecho "[$bmkDriver] ERROR! json file '${jsonFile}' not found" json=1 else - echo "[$bmkDriver] lint json file '${jsonFile}' syntax using jq" + myecho "[$bmkDriver] lint json file '${jsonFile}' syntax using jq" if ! jq '.' -c < ${jsonFile}; then - echo "[$bmkDriver] json file '${jsonFile}' lint validation failed" + myecho "[$bmkDriver] json file '${jsonFile}' lint validation failed" json=1 fi fi if [ -f ${jsonFile_new} ]; then - echo "[$bmkDriver] lint json file '${jsonFile_new}' syntax using jq" + myecho "[$bmkDriver] lint json file '${jsonFile_new}' syntax using jq" if ! jq '.' -c < ${jsonFile_new}; then - echo "[$bmkDriver] json file '${jsonFile_new}' lint validation failed" + myecho "[$bmkDriver] json file '${jsonFile_new}' lint validation failed" json=1 fi fi -echo -e "[$bmkDriver] json file validation: completed (status=$json)\n" +myecho "[$bmkDriver] json file validation: completed (status=$json)\n" # NB: This script is meant to be sourced, it does not return or exit at the end if [ $parse -ne 0 ] || [ $fail -ne 0 ] || [ $json -ne 0 ]; then @@ -426,8 +430,8 @@ if [ $parse -ne 0 ] || [ $fail -ne 0 ] || [ $json -ne 0 ]; then else bmkStatus=0 fi -echo -e "[$bmkDriver] exiting back to $bmkScript" -echo -e "\n========================================================================" -echo -e "[$bmkDriver] $(date) exiting common benchmark driver (status=$bmkStatus)" -echo -e "========================================================================\n" +myecho "[$bmkDriver] exiting back to $bmkScript" +myecho "\n========================================================================" +myecho "[$bmkDriver] $(date) exiting common benchmark driver (status=$bmkStatus)" +myecho "========================================================================\n" exit $bmkStatus diff --git a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh index cc55d92..e0bded6 100755 --- a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh +++ b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh @@ -6,10 +6,6 @@ #set -e # immediate exit on error -function myecho(){ - echo -e "[${FUNCNAME[1]}] $@" -} - # Function doOne must be defined in each benchmark # Input argument $1: process index (between 1 and $NCOPIES) # Return value: please return 0 if this workload copy was successful, 1 otherwise @@ -76,9 +72,9 @@ function validateInputArguments(){ } # Default values for NCOPIES, NTHREADS, NEVENTS_THREAD must be set in each benchmark -NTHREADS=1 -NCOPIES=1 -NEVENTS_THREAD=10 +export NTHREADS=8 +export NCOPIES=1 +export NEVENTS_THREAD=-1 if [ "$NCOPIES" -lt 1 ]; then # when $NTHREADS > nproc NCOPIES=1 NTHREADS=`nproc` -- GitLab From 975ca3f7842bcf2d9c09352d5819ea9454a167ca Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Thu, 2 Jul 2020 00:07:08 +0200 Subject: [PATCH 25/74] explicit dummy args --- cms/patatrack/cms-patatrack-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cms/patatrack/cms-patatrack-ci.yml b/cms/patatrack/cms-patatrack-ci.yml index 30275a4..3ee3bdf 100644 --- a/cms/patatrack/cms-patatrack-ci.yml +++ b/cms/patatrack/cms-patatrack-ci.yml @@ -98,7 +98,7 @@ job_snapshot_cvmfs: # check cvmfs is running - docker exec cvmfs_${CI_JOB_ID} cvmfs_config probe # Here comes the dry run of the CMS Patatrack container. Arguments are for the time being defaults/hardcoded FIXME - - docker run --rm --gpus '"device=0"' -v ${CIENV_CVMFSVOLUME}:/cvmfs gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa + - docker run --rm --gpus '"device=0"' -v ${CIENV_CVMFSVOLUME}:/cvmfs gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa -t 8 -c 1 -e 100 # run shrinkwrapper - docker exec cvmfs_${CI_JOB_ID} /root/shrinkwrap.sh -t /tmp/traces/ -e ${CVMFS_EXPORT_DIR} -j ${CVMFS_EXPORT_DIR} # remove duplicated data -- GitLab From 7786dbc1b53075408936ae465be492c7d18a3523 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Thu, 2 Jul 2020 00:26:40 +0200 Subject: [PATCH 26/74] fix image pull --- .gitlab-ci.yml | 2 +- cms/{patatrack => }/cms-patatrack-ci.yml | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) rename cms/{patatrack => }/cms-patatrack-ci.yml (95%) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9ee7b79..dd0005d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -44,7 +44,7 @@ patatrack: stage: triggers trigger: include: - - local: cms/patatrack/cms-patatrack-ci.yml + - local: cms/cms-patatrack-ci.yml strategy: depend only: variables: diff --git a/cms/patatrack/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml similarity index 95% rename from cms/patatrack/cms-patatrack-ci.yml rename to cms/cms-patatrack-ci.yml index 3ee3bdf..77a8461 100644 --- a/cms/patatrack/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -98,7 +98,8 @@ job_snapshot_cvmfs: # check cvmfs is running - docker exec cvmfs_${CI_JOB_ID} cvmfs_config probe # Here comes the dry run of the CMS Patatrack container. Arguments are for the time being defaults/hardcoded FIXME - - docker run --rm --gpus '"device=0"' -v ${CIENV_CVMFSVOLUME}:/cvmfs gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa -t 8 -c 1 -e 100 + - docker pull gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa + - docker run --rm --gpus '"device=0"' -v ${CIENV_CVMFSVOLUME}:/cvmfs gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa # run shrinkwrapper - docker exec cvmfs_${CI_JOB_ID} /root/shrinkwrap.sh -t /tmp/traces/ -e ${CVMFS_EXPORT_DIR} -j ${CVMFS_EXPORT_DIR} # remove duplicated data @@ -110,10 +111,10 @@ job_snapshot_cvmfs: variables: - $CI_COMMIT_BRANCH =~ /^qa.*$/ - $CI_COMMIT_TAG =~ /^v.*$/ - changes: - - cms/patatrack/* - - cms/patatrack/cms-patatrack/* - - cms/patatrack/cms-patatrack/*/* + #changes: + # - cms/patatrack/* + # - cms/patatrack/cms-patatrack/* + # - cms/patatrack/cms-patatrack/*/* artifacts: paths: - ${CI_PROJECT_DIR}/traces -- GitLab From 0620c99719dac5482e894dc02b9c9cc7e3d79657 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Thu, 2 Jul 2020 00:31:25 +0200 Subject: [PATCH 27/74] pass arguments --- cms/cms-patatrack-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index 77a8461..c8090e8 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -99,7 +99,7 @@ job_snapshot_cvmfs: - docker exec cvmfs_${CI_JOB_ID} cvmfs_config probe # Here comes the dry run of the CMS Patatrack container. Arguments are for the time being defaults/hardcoded FIXME - docker pull gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa - - docker run --rm --gpus '"device=0"' -v ${CIENV_CVMFSVOLUME}:/cvmfs gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa + - docker run --rm --gpus '"device=0"' -v ${CIENV_CVMFSVOLUME}:/cvmfs gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa -e 100 -t 8 -c 1 # run shrinkwrapper - docker exec cvmfs_${CI_JOB_ID} /root/shrinkwrap.sh -t /tmp/traces/ -e ${CVMFS_EXPORT_DIR} -j ${CVMFS_EXPORT_DIR} # remove duplicated data -- GitLab From 85737603a67b75b2371792992bffb71cbbd9b1fc Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Thu, 2 Jul 2020 00:36:31 +0200 Subject: [PATCH 28/74] change default dummy args --- cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh index e0bded6..bd4cfb4 100755 --- a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh +++ b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh @@ -72,9 +72,9 @@ function validateInputArguments(){ } # Default values for NCOPIES, NTHREADS, NEVENTS_THREAD must be set in each benchmark -export NTHREADS=8 -export NCOPIES=1 -export NEVENTS_THREAD=-1 +NTHREADS=8 +NCOPIES=1 +NEVENTS_THREAD=10 if [ "$NCOPIES" -lt 1 ]; then # when $NTHREADS > nproc NCOPIES=1 NTHREADS=`nproc` -- GitLab From 6fe11b3278e9196293f5e9335466d5d36010a06f Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Thu, 2 Jul 2020 00:54:31 +0200 Subject: [PATCH 29/74] myecho fix --- cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh index bd4cfb4..d51a059 100755 --- a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh +++ b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh @@ -6,6 +6,11 @@ #set -e # immediate exit on error + +function myecho(){ + echo -e "[${FUNCNAME[1]}] $@" +} + # Function doOne must be defined in each benchmark # Input argument $1: process index (between 1 and $NCOPIES) # Return value: please return 0 if this workload copy was successful, 1 otherwise -- GitLab From 0d03d82842dc295ae2b7086d2142580ddf3803cf Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Thu, 2 Jul 2020 00:58:00 +0200 Subject: [PATCH 30/74] jq still not available --- cms/patatrack/nvidia.Dockerfile.0 | 1 - 1 file changed, 1 deletion(-) diff --git a/cms/patatrack/nvidia.Dockerfile.0 b/cms/patatrack/nvidia.Dockerfile.0 index ea44e6a..22d0994 100644 --- a/cms/patatrack/nvidia.Dockerfile.0 +++ b/cms/patatrack/nvidia.Dockerfile.0 @@ -12,7 +12,6 @@ RUN yum install -y \ perl perl-Data-Dumper \ patch git vim; yum clean all - RUN yum --enablerepo=extras install epel-release -y RUN yum install -y python2-pip RUN pip install --upgrade pip -- GitLab From 193066dcde450d975528393168c9075b4be079e1 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Thu, 2 Jul 2020 09:06:43 +0200 Subject: [PATCH 31/74] skip temporarly parsing and json generation --- cms/patatrack/cms-patatrack/bmk-driver.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cms/patatrack/cms-patatrack/bmk-driver.sh b/cms/patatrack/cms-patatrack/bmk-driver.sh index 0cdb9be..5828de9 100644 --- a/cms/patatrack/cms-patatrack/bmk-driver.sh +++ b/cms/patatrack/cms-patatrack/bmk-driver.sh @@ -387,6 +387,12 @@ else myecho "[$bmkDriver] validateInputArguments failed: skip doOne processing" fi +myecho '''FIXME bmkDriver is forced to exit here, + the parsing of results should be implemented + and this exit point removed + ''' +exit 0 #FIXME + # Parse results and generate summary using function parseResults # - parseResults is started in the base working directoy # - the number of failed jobs is passed to parseResults as input parameter -- GitLab From 010f5f1c0fac42ba40fdd3d0e48c2f4163fd3553 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Thu, 2 Jul 2020 16:29:09 +0200 Subject: [PATCH 32/74] remove bmkuser swap --- cms/patatrack/cms-patatrack/bmk-driver.sh | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cms/patatrack/cms-patatrack/bmk-driver.sh b/cms/patatrack/cms-patatrack/bmk-driver.sh index 5828de9..e711aeb 100644 --- a/cms/patatrack/cms-patatrack/bmk-driver.sh +++ b/cms/patatrack/cms-patatrack/bmk-driver.sh @@ -1,5 +1,5 @@ -if [ "$BASH_SOURCE" = "$0" ]; then myecho "ERROR! This script ($0) was not sourced"; exit 1; fi -if [ "$BASH_SOURCE" = "" ]; then myecho "ERROR! This script was not sourced from bash"; return 1; fi +if [ "$BASH_SOURCE" = "$0" ]; then echo "ERROR! This script ($0) was not sourced"; exit 1; fi +if [ "$BASH_SOURCE" = "" ]; then echo "ERROR! This script was not sourced from bash"; return 1; fi bmkDriver=$(basename ${BASH_SOURCE}) bmkScript=$(basename $0) @@ -316,18 +316,18 @@ function doOneWrapper(){ local pid=$(cat $log | grep "current process pid is" | sed -e "s/.*current process pid is //") local parsertest=0 # hardcoded: 0 => doOne (default); 1 => test the parser on old logs and bypass doOne (BMK-152) if [ $parsertest -eq 0 ]; then - if [ "$(whoami)" == "root" ] && cat /proc/self/cgroup | cut -d/ -f2 | grep docker > /dev/null; then - myecho "[doOneWrapper ($1)] inside docker - run doOne as bmkuser\n" 2>&1 | tee -a $log - export -f doOne - chown -R bmkuser:bmkuser $workDir 2>&1 | tee -a $log - su bmkuser -s /bin/bash -c "doOne $1" 2>&1 | tee -a $log - local status=${PIPESTATUS[0]} # NB do not use $? if you pipe to tee! - chown -R root:root $workDir 2>&1 | tee -a $log - else + # if [ "$(whoami)" == "root" ] && cat /proc/self/cgroup | cut -d/ -f2 | grep docker > /dev/null; then + # myecho "[doOneWrapper ($1)] inside docker - run doOne as bmkuser\n" 2>&1 | tee -a $log + # export -f doOne + # chown -R bmkuser:bmkuser $workDir 2>&1 | tee -a $log + # su bmkuser -s /bin/bash -c "doOne $1" 2>&1 | tee -a $log + # local status=${PIPESTATUS[0]} # NB do not use $? if you pipe to tee! + # chown -R root:root $workDir 2>&1 | tee -a $log + # else myecho "[doOneWrapper ($1)] not inside docker - run doOne as $(whoami)\n" 2>&1 | tee -a $log doOne $1 2>&1 | tee -a $log local status=${PIPESTATUS[0]} # NB do not use $? if you pipe to tee! - fi + # fi else cp -dpr $BMKDIR/jobs/refjob/proc_$1/* . local status=$? -- GitLab From db23ab262fe40ddc4405dd679345aaed7011df09 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Thu, 2 Jul 2020 16:35:40 +0200 Subject: [PATCH 33/74] make cvmfs read only in the image --- cms/patatrack/nvidia.Dockerfile.2 | 1 + 1 file changed, 1 insertion(+) diff --git a/cms/patatrack/nvidia.Dockerfile.2 b/cms/patatrack/nvidia.Dockerfile.2 index a94f708..b863b55 100644 --- a/cms/patatrack/nvidia.Dockerfile.2 +++ b/cms/patatrack/nvidia.Dockerfile.2 @@ -15,6 +15,7 @@ FROM gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack- # This should normally contain always the same files and be cacheable (BMK-159) COPY ./cvmfs /cvmfs +RUN chmod -R 555 /cvmfs RUN tar -cf /tmp/cvmfs_checksum.tar /cvmfs && md5sum /tmp/cvmfs_checksum.tar | cut -f1 -d" " > /tmp/cvmfs_checksum && rm /tmp/cvmfs_checksum.tar -- GitLab From cc2f31c75356da98648fb720ac50a44660aa2ce7 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Thu, 2 Jul 2020 16:36:07 +0200 Subject: [PATCH 34/74] add test stage --- cms/cms-patatrack-ci.yml | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index c8090e8..9a2a8c2 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -5,7 +5,9 @@ stages: - build_2 - snapshot - build_standalone -#- test +- test +#- publish +#- announce ########################## ## Templates ############# @@ -135,4 +137,31 @@ job_build_standalone_image: changes: - cms/patatrack/* - cms/patatrack/cms-patatrack/* - - cms/patatrack/cms-patatrack/*/* \ No newline at end of file + - cms/patatrack/cms-patatrack/*/* + +job_test_standalone_image: + stage: test + tags: + - hep-workload-gpu-docker-builder + image: + name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa + script: + - export RESULTS_DIR=/scratch/results/CI-JOB-${CI_JOB_ID} + - export IMAGE_NAME=gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-bmk:${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} + - docker pull ${IMAGE_NAME} + # Here comes the test run of the CMS Patatrack standalone container. Arguments are for the time being defaults/hardcoded FIXME + - docker run --rm --gpus '"device=0"' -v ${RESULTS_DIR}:/results ${IMAGE_NAME} -e 100 -t 8 -c 1 + - mv ${RESULTS_DIR} ${CI_PROJECT_DIR}/. + only: + variables: + - $CI_COMMIT_BRANCH =~ /^qa.*$/ + - $CI_COMMIT_TAG =~ /^v.*$/ + #changes: + # - cms/patatrack/* + # - cms/patatrack/cms-patatrack/* + # - cms/patatrack/cms-patatrack/*/* + artifacts: + paths: + - ${CI_PROJECT_DIR}/${RESULTS_DIR} + expire_in: 1 week + when: always \ No newline at end of file -- GitLab From 301a31bda2352300676e14743ff10d8e39578233 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Thu, 2 Jul 2020 16:37:29 +0200 Subject: [PATCH 35/74] set LC_ALL encoding --- cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh index d51a059..4f03b4a 100755 --- a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh +++ b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh @@ -85,6 +85,8 @@ if [ "$NCOPIES" -lt 1 ]; then # when $NTHREADS > nproc NTHREADS=`nproc` fi +export LC_ALL=en_US.UTF-8 + # Source the common benchmark driver if [ -f $(dirname $0)/bmk-driver.sh ]; then . $(dirname $0)/bmk-driver.sh -- GitLab From 849fa508fac3d826a9db53ffcb2e5fc71f75d11c Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Thu, 2 Jul 2020 18:13:35 +0200 Subject: [PATCH 36/74] initial readme instructions --- cms/README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 cms/README.md diff --git a/cms/README.md b/cms/README.md new file mode 100644 index 0000000..74a5c07 --- /dev/null +++ b/cms/README.md @@ -0,0 +1,19 @@ +# CMS GPU workloads + +The sub-folders contain workloads provided by the CMS experiment that run on CPU+GPU system. +The reconstruction package is known as CMS Patratrack and is published in https://github.com/cms-patatrack + +We use it to build a CPU+GPU benchmark workload, following the same approaches developed for the HEP-workloads targetting CPUs [HEP workloads](https://gitlab.cern.ch/hep-benchmarks/hep-workloads) +The purpose of this hep-workloads-gpu gitlab project is to build standalone container including software, data and orchestrator procedures needed to run the CMS workload as a benchmark. +For this purpose a limited set of events is used to run the reconstruction workload and measure the performance in terms of event throughput. + +The procedure to build the standalone container is documented in the gitlab CI [yml file](https://gitlab.cern.ch/hep-benchmarks/hep-workloads-gpu/-/blob/qa/cms/cms-patatrack-ci.yml) + +In order to run the standalone container follow these steps and look for results in the defined RESULTS_DIR + +``` +export RESULTS_DIR=/any_path_you_like +export IMAGE_NAME=gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-bmk:qa +docker pull ${IMAGE_NAME} +docker run --rm --gpus '"device=0"' -v ${RESULTS_DIR}:/results ${IMAGE_NAME} +``` \ No newline at end of file -- GitLab From 4e80d526cff640bf22e71b75ac0b689989bc70a1 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Thu, 2 Jul 2020 22:36:19 +0200 Subject: [PATCH 37/74] remove scipy numpy should ocme from cvmfs --- cms/patatrack/nvidia.Dockerfile.0 | 3 --- 1 file changed, 3 deletions(-) diff --git a/cms/patatrack/nvidia.Dockerfile.0 b/cms/patatrack/nvidia.Dockerfile.0 index 22d0994..cd42d72 100644 --- a/cms/patatrack/nvidia.Dockerfile.0 +++ b/cms/patatrack/nvidia.Dockerfile.0 @@ -13,7 +13,4 @@ RUN yum install -y \ patch git vim; yum clean all RUN yum --enablerepo=extras install epel-release -y -RUN yum install -y python2-pip -RUN pip install --upgrade pip -RUN pip install numpy scipy -- GitLab From 5e6abff4aea9cdd2c8dacab0819359f04284fb38 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Fri, 3 Jul 2020 22:52:45 +0200 Subject: [PATCH 38/74] patch for scipy problem with cvmfs shrinkwrapper --- cms/cms-patatrack-ci.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index 9a2a8c2..581350f 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -97,13 +97,18 @@ job_snapshot_cvmfs: - sleep 1m # to give time to cvmfs to start - export CIENV_CVMFSVOLUME=/scratch/cvmfs_hep/CI-JOB-${CI_JOB_ID} - export CVMFS_EXPORT_DIR=${CI_PROJECT_DIR}/cms/patatrack + - echo "CVMFS_EXPORT_DIR is $CVMFS_EXPORT_DIR" # check cvmfs is running - docker exec cvmfs_${CI_JOB_ID} cvmfs_config probe # Here comes the dry run of the CMS Patatrack container. Arguments are for the time being defaults/hardcoded FIXME - docker pull gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa - - docker run --rm --gpus '"device=0"' -v ${CIENV_CVMFSVOLUME}:/cvmfs gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa -e 100 -t 8 -c 1 + - docker run --name patatrack_container --gpus '"device=0"' -v ${CIENV_CVMFSVOLUME}:/cvmfs gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa -e 100 -t 8 -c 1 # run shrinkwrapper - docker exec cvmfs_${CI_JOB_ID} /root/shrinkwrap.sh -t /tmp/traces/ -e ${CVMFS_EXPORT_DIR} -j ${CVMFS_EXPORT_DIR} + # FIXME this is a dirty patch needed to make scipy running. cvmfs shrinkwrapper alone does not copy all files of that dir. To be investigated why + - ls -lR ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7 + - rm -fr ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7/site-packages + - docker cp /cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7/site-packages ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7 # remove duplicated data - rm -rf ${CVMFS_EXPORT_DIR}/cvmfs/.data - ls -R ${CVMFS_EXPORT_DIR} > ${CI_PROJECT_DIR}/cvmfs_export_dir_content -- GitLab From e24529863e3ceddbc1a709e3a917ee113632b15d Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Fri, 3 Jul 2020 22:55:09 +0200 Subject: [PATCH 39/74] simplify dependency from patatrack-scripts --- .../cms-patatrack/cms-patatrack-bmk.sh | 18 +- cms/patatrack/cms-patatrack/cmssw_config.py | 141 ++++++ .../utility_scripts/benchmark.py | 66 +++ .../cms-patatrack/utility_scripts/cpuinfo.py | 80 +++ .../cms-patatrack/utility_scripts/gpuinfo.py | 58 +++ .../cms-patatrack/utility_scripts/multirun.py | 455 ++++++++++++++++++ .../utility_scripts/plot_scan.py | 116 +++++ .../utility_scripts/set_output_encoding.py | 18 + .../utility_scripts/sourceFromPixelRaw_cff.py | 51 ++ .../cms-patatrack/utility_scripts/threaded.py | 21 + 10 files changed, 1008 insertions(+), 16 deletions(-) create mode 100644 cms/patatrack/cms-patatrack/cmssw_config.py create mode 100755 cms/patatrack/cms-patatrack/utility_scripts/benchmark.py create mode 100755 cms/patatrack/cms-patatrack/utility_scripts/cpuinfo.py create mode 100755 cms/patatrack/cms-patatrack/utility_scripts/gpuinfo.py create mode 100755 cms/patatrack/cms-patatrack/utility_scripts/multirun.py create mode 100755 cms/patatrack/cms-patatrack/utility_scripts/plot_scan.py create mode 100644 cms/patatrack/cms-patatrack/utility_scripts/set_output_encoding.py create mode 100644 cms/patatrack/cms-patatrack/utility_scripts/sourceFromPixelRaw_cff.py create mode 100644 cms/patatrack/cms-patatrack/utility_scripts/threaded.py diff --git a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh index 4f03b4a..6366a94 100755 --- a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh +++ b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh @@ -24,13 +24,11 @@ function doOne(){ ####################################### # This needs to be fixed - ln -s ${BMKDIR}/patatrack-scripts - ls -l - cd patatrack-scripts + myecho "current dir is `pwd`" myecho "files in `pwd` are" ls -l - ./benchmark profile.py #>>$LOG 2>&1 3>&1 + ${BMKDIR}/utility_scripts/benchmark.py ${BMKDIR}/cmssw_config.py #>>$LOG 2>&1 3>&1 ####################################### status=${?} @@ -61,18 +59,6 @@ function validateInputArguments(){ pushd ${CMSSW_RELEASE}; eval `scramv1 runtime -sh`; popd # Configure WL copy - myecho "current dir is `pwd`" - myecho "files in `pwd` are" - ls -l - - cd ${BMKDIR}/patatrack-scripts - myecho "Moving to `pwd`" - myecho "Preparing configuration files" - ./workflow.sh - patch -b --forward profile.py profile.patch - #change .../patatrack-scripts/sourceFromPixelRaw_cff.py and point "fed_prefix" to .../extraction/path/.../store/opendata/cms - sed -i "s|/data/store/opendata/cms|/bmk/data/store/opendata/cms|g" sourceFromPixelRaw_cff.py - myecho "Configuration file done" return 0 } diff --git a/cms/patatrack/cms-patatrack/cmssw_config.py b/cms/patatrack/cms-patatrack/cmssw_config.py new file mode 100644 index 0000000..4fd5c3b --- /dev/null +++ b/cms/patatrack/cms-patatrack/cmssw_config.py @@ -0,0 +1,141 @@ +# Auto generated configuration file +# using: +# Revision: 1.19 +# Source: /local/reps/CMSSW/CMSSW/Configuration/Applications/python/ConfigBuilder.py,v +# with command line options: profile --data --era Run2_2018 --geometry DB:Extended --conditions 102X_dataRun2_HLT_v2 -s RAW2DIGI:RawToDigi_pixelOnly,RECO:reconstruction_pixelTrackingOnly,DQM:@pixelTrackingOnlyDQM --procModifiers gpu --customise RecoPixelVertexing/Configuration/customizePixelTracksForProfiling.customizePixelTracksForProfilingGPUOnly -n 4200 --nThreads 8 --runUnscheduled --filein file:step2.root --fileout file:step3.root --datatier GEN-SIM-RECO,DQMIO --eventcontent RECOSIM,DQM --python_filename profile.py --no_exec +import FWCore.ParameterSet.Config as cms + +from Configuration.Eras.Era_Run2_2018_cff import Run2_2018 +from Configuration.ProcessModifiers.gpu_cff import gpu + +process = cms.Process('RECO',Run2_2018,gpu) + +# import of standard configurations +process.load('Configuration.StandardSequences.Services_cff') +process.load('SimGeneral.HepPDTESSource.pythiapdt_cfi') +process.load('FWCore.MessageService.MessageLogger_cfi') +process.load('Configuration.EventContent.EventContent_cff') +process.load('Configuration.StandardSequences.GeometryRecoDB_cff') +process.load('Configuration.StandardSequences.MagneticField_AutoFromDBCurrent_cff') +process.load('Configuration.StandardSequences.RawToDigi_Data_cff') +process.load('Configuration.StandardSequences.Reconstruction_Data_cff') +process.load('DQMServices.Core.DQMStoreNonLegacy_cff') +process.load('DQMOffline.Configuration.DQMOffline_cff') +process.load('Configuration.StandardSequences.FrontierConditions_GlobalTag_cff') + +process.maxEvents = cms.untracked.PSet( + input = cms.untracked.int32(4200), + output = cms.optional.untracked.allowed(cms.int32,cms.PSet) +) + +process.options = cms.untracked.PSet( + FailPath = cms.untracked.vstring(), + IgnoreCompletely = cms.untracked.vstring(), + Rethrow = cms.untracked.vstring(), + SkipEvent = cms.untracked.vstring(), + allowUnscheduled = cms.obsolete.untracked.bool, + canDeleteEarly = cms.untracked.vstring(), + emptyRunLumiMode = cms.obsolete.untracked.string, + eventSetup = cms.untracked.PSet( + forceNumberOfConcurrentIOVs = cms.untracked.PSet( + + ), + numberOfConcurrentIOVs = cms.untracked.uint32(1) + ), + fileMode = cms.untracked.string('FULLMERGE'), + forceEventSetupCacheClearOnNewRun = cms.untracked.bool(False), + makeTriggerResults = cms.obsolete.untracked.bool, + numberOfConcurrentLuminosityBlocks = cms.untracked.uint32(1), + numberOfConcurrentRuns = cms.untracked.uint32(1), + numberOfStreams = cms.untracked.uint32(0), + numberOfThreads = cms.untracked.uint32(1), + printDependencies = cms.untracked.bool(False), + sizeOfStackForThreadsInKB = cms.optional.untracked.uint32, + throwIfIllegalParameter = cms.untracked.bool(True), + wantSummary = cms.untracked.bool(False) +) + +# Production Info +process.configurationMetadata = cms.untracked.PSet( + annotation = cms.untracked.string('profile nevts:4200'), + name = cms.untracked.string('Applications'), + version = cms.untracked.string('$Revision: 1.19 $') +) + +# Output definition + +process.RECOSIMoutput = cms.OutputModule("PoolOutputModule", + dataset = cms.untracked.PSet( + dataTier = cms.untracked.string('GEN-SIM-RECO'), + filterName = cms.untracked.string('') + ), + fileName = cms.untracked.string('file:step3.root'), + outputCommands = process.RECOSIMEventContent.outputCommands, + splitLevel = cms.untracked.int32(0) +) + +process.DQMoutput = cms.OutputModule("DQMRootOutputModule", + dataset = cms.untracked.PSet( + dataTier = cms.untracked.string('DQMIO'), + filterName = cms.untracked.string('') + ), + fileName = cms.untracked.string('file:step3_inDQM.root'), + outputCommands = process.DQMEventContent.outputCommands, + splitLevel = cms.untracked.int32(0) +) + +# Additional output definition + +# Other statements +from Configuration.AlCa.GlobalTag import GlobalTag +process.GlobalTag = GlobalTag(process.GlobalTag, '102X_upgrade2018_design_v9', '') + +# Path and EndPath definitions +process.raw2digi_step = cms.Path(process.RawToDigi_pixelOnly) +process.reconstruction_step = cms.Path(process.reconstruction_pixelTrackingOnly) +process.dqmoffline_step = cms.EndPath(process.DQMOfflinePixelTracking) +process.dqmofflineOnPAT_step = cms.EndPath(process.PostDQMOffline) +process.RECOSIMoutput_step = cms.EndPath(process.RECOSIMoutput) +process.DQMoutput_step = cms.EndPath(process.DQMoutput) + +# Schedule definition +process.schedule = cms.Schedule(process.raw2digi_step,process.reconstruction_step,process.dqmoffline_step,process.dqmofflineOnPAT_step,process.RECOSIMoutput_step,process.DQMoutput_step) +from PhysicsTools.PatAlgos.tools.helpers import associatePatAlgosToolsTask +associatePatAlgosToolsTask(process) + +#Setup FWK for multithreaded +process.options.numberOfThreads=cms.untracked.uint32(8) +process.options.numberOfStreams=cms.untracked.uint32(0) +process.options.numberOfConcurrentLuminosityBlocks=cms.untracked.uint32(1) + +# customisation of the process. + +# Automatic addition of the customisation function from RecoPixelVertexing.Configuration.customizePixelTracksForProfiling +from RecoPixelVertexing.Configuration.customizePixelTracksForProfiling import customizePixelTracksForProfilingGPUOnly + +#call to customisation function customizePixelTracksForProfilingGPUOnly imported from RecoPixelVertexing.Configuration.customizePixelTracksForProfiling +process = customizePixelTracksForProfilingGPUOnly(process) + +# End of customisation functions +#do not add changes to your config after this point (unless you know what you are doing) + + +# Customisation from command line + +#Have logErrorHarvester wait for the same EDProducers to finish as those providing data for the OutputModule +from FWCore.Modules.logErrorHarvester_cff import customiseLogErrorHarvesterUsingOutputCommands +process = customiseLogErrorHarvesterUsingOutputCommands(process) + +# Add early deletion of temporary data products to reduce peak memory need +from Configuration.StandardSequences.earlyDeleteSettings_cff import customiseEarlyDelete +process = customiseEarlyDelete(process) +# End adding early deletion + +# load data using the DAQ source +process.load('sourceFromPixelRaw_cff') + +# report CUDAService messages +process.MessageLogger.categories.append("CUDAService") + +# print the summary +process.options.wantSummary = cms.untracked.bool( True ) diff --git a/cms/patatrack/cms-patatrack/utility_scripts/benchmark.py b/cms/patatrack/cms-patatrack/utility_scripts/benchmark.py new file mode 100755 index 0000000..e3347b8 --- /dev/null +++ b/cms/patatrack/cms-patatrack/utility_scripts/benchmark.py @@ -0,0 +1,66 @@ +#! /usr/bin/env python + +import sys +import os +import copy + +from multirun import * +import FWCore.ParameterSet.Config as cms + + +if __name__ == "__main__": + if not 'CMSSW_BASE' in os.environ: + # FIXME print a meaningful error message + sys.exit(1) + + if len(sys.argv) == 1: + # FIXME print a meaningful error message + sys.exit(1) + + # TODO parse arguments and options from the command line + options = { + 'verbose' : False, + 'plumbing' : False, + 'warmup' : True, + 'events' : 4200, + 'repeats' : 4, + 'jobs' : 1, + 'threads' : 8, # per job + 'streams' : 8, # per job + 'gpus_per_job' : 1, # per job + 'allow_hyperthreading': False, # this has no effect if set_cpu_affinity is False + 'set_cpu_affinity' : True, + 'set_gpu_affinity' : True, + 'logdir' : None, # relative or absolute path, or None to disable storing the logs + 'keep' : [], # output files to be kept + } + + +#### FIXME: Not clear if for GPU benchamring purposes we need to +#### FIXME: run before io benchmark. Skipping for the itme being +#### FIXME: setting flag to False + run_io_benchmark = False + #run_io_benchmark = True + + info() + + for config in sys.argv[1:]: + process = parseProcess(config) + + if run_io_benchmark: + print 'Benchmarking only I/O' + io = copy.deepcopy(process) + io.hltGetRaw = cms.EDAnalyzer("HLTGetRaw", RawDataCollection = cms.InputTag("rawDataCollector")) + io.path = cms.Path(io.hltGetRaw) + io.schedule = cms.Schedule(io.path) + if 'PrescaleService' in io.__dict__: + del io.PrescaleService + io_options = copy.deepcopy(options) + io_options['logdir'] = None + io_options['keep'] = [] + multiCmsRun(io, **io_options) + run_io_benchmark = False + print + + print 'Benchmarking %s' % config + multiCmsRun(process, **options) diff --git a/cms/patatrack/cms-patatrack/utility_scripts/cpuinfo.py b/cms/patatrack/cms-patatrack/utility_scripts/cpuinfo.py new file mode 100755 index 0000000..77c16ec --- /dev/null +++ b/cms/patatrack/cms-patatrack/utility_scripts/cpuinfo.py @@ -0,0 +1,80 @@ +#! /usr/bin/env python + +import sys +import subprocess +import re +import collections + + +class CPUInfo(object): + def __init__(self, socket = None, model = None): + self.socket = socket + self.model = model + self.cores = {} + self.hardware_threads = [] + self.physical_processors = [] + + def add_core(self, core, thread): + if core in self.cores: + self.cores[core].append(thread) + else: + self.cores[core] = [ thread ] + + def finalise(self): + for core in self.cores.values(): + self.physical_processors.append(core[0]) + self.hardware_threads.extend(core) + self.physical_processors.sort() + self.hardware_threads.sort() + + +# cache results across calls +__cache = None + + +# return a mapping between sockets and CPUInfo objects +def get_cpu_info(cache = True): + global __cache + if cache and __cache: + return __cache + + cpus = collections.OrderedDict() + + model = 'Unknown CPU' + description = subprocess.Popen(['lscpu', ], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()[0] + for line in description.splitlines(): + if 'Model name:' in line: + model = line.split(':')[1].strip() + break + + devices = subprocess.Popen(['lscpu', '-b', '-p=SOCKET,NODE,CORE,CPU'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()[0] + for line in devices.splitlines(): + if '#' in line: + continue + + sock, numa, core, proc = line.split(',') + sock = int(sock) if sock else 0 + numa = int(numa) if numa else sock # currently unused + core = int(core) if core else 0 + proc = int(proc) if proc else 0 + + if not sock in cpus: + cpus[sock] = CPUInfo(sock, model) + cpus[sock].add_core(core, proc) + + for cpu in cpus.values(): + cpu.finalise() + + if cache: + __cache = cpus + + return cpus + + +if __name__ == "__main__": + cpus = get_cpu_info() + print '%d CPUs:' % len(cpus) + for cpu in cpus.values(): + print ' %d: %s (%d cores, %d threads)' % (cpu.socket, cpu.model, len(cpu.physical_processors), len(cpu.hardware_threads)) + print ' cores: %s' % ', '.join(map(str, cpu.physical_processors)) + print ' HT\'s: %s' % ', '.join(map(str, cpu.hardware_threads)) diff --git a/cms/patatrack/cms-patatrack/utility_scripts/gpuinfo.py b/cms/patatrack/cms-patatrack/utility_scripts/gpuinfo.py new file mode 100755 index 0000000..7abd837 --- /dev/null +++ b/cms/patatrack/cms-patatrack/utility_scripts/gpuinfo.py @@ -0,0 +1,58 @@ +#! /usr/bin/env python + +import sys +import os +import subprocess +import re +import collections + + +class GPUInfo(object): + def __init__(self, device = None, model = None): + self.device = device + self.model = model + + +# cache results across calls +__cache = None + + +# return a mapping between devices and GPUInfo objects +def get_gpu_info(cache = True): + global __cache + if cache and __cache: + return __cache + + gpus = collections.OrderedDict() + + visible = None + if 'CUDA_VISIBLE_DEVICES' in os.environ: + if os.environ['CUDA_VISIBLE_DEVICES'] == '': + visible = [] + else: + visible = [int(device) for device in os.environ['CUDA_VISIBLE_DEVICES'].split(',')] + + devices = subprocess.Popen(['cudaComputeCapabilities', ], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()[0] + for line in devices.splitlines(): + matches = re.match(r' *([0-9]+) +([0-9]+\.[0-9]) +(.*)', line) + if matches: + device = int(matches.group(1)) + if visible: + device = visible[device] + model = matches.group(3).strip() + gpus[device] = GPUInfo(device, model) + + if cache: + __cache = gpus + + return gpus + + +if __name__ == "__main__": + gpus = get_gpu_info() + if gpus: + print '%d visible NVIDIA GPUs:' % len(gpus) + for gpu in gpus.values(): + print ' %d: %s' % (gpu.device, gpu.model) + else: + print 'No visible NVIDIA GPUs' diff --git a/cms/patatrack/cms-patatrack/utility_scripts/multirun.py b/cms/patatrack/cms-patatrack/utility_scripts/multirun.py new file mode 100755 index 0000000..d1281a5 --- /dev/null +++ b/cms/patatrack/cms-patatrack/utility_scripts/multirun.py @@ -0,0 +1,455 @@ +#! /usr/bin/env python + +import sys +import os +import copy +import imp +import itertools +import math +import shutil +import subprocess +import tempfile +from collections import defaultdict +from datetime import datetime +import numpy as np +from scipy import stats + +# FIXME check that CMSSW_BASE is set +import FWCore.ParameterSet.Config as cms + +# set the output encoding to UTF-8 for pipes and redirects +from set_output_encoding import * +set_output_encoding(encoding='utf-8', force=True) + +from cpuinfo import * +from gpuinfo import * +from threaded import threaded + +cpus = get_cpu_info() +gpus = get_gpu_info() + +epoch = datetime.now() + +@threaded +def singleCmsRun(filename, workdir, logdir = None, keep = [], verbose = False, cpus = None, gpus = None, *args): + # optionally set CPU affinity + command = ('cmsRun', filename) + args + if cpus is not None: + command = ('taskset', '-c', cpus) + command + cmdline = ' '.join(command) + + # optionally set GPU affinity + environment = os.environ.copy() + if gpus is not None: + environment['CUDA_VISIBLE_DEVICES'] = gpus + cmdline = 'CUDA_VISIBLE_DEVICES=' + gpus + ' ' + cmdline + + if verbose: + print cmdline + + # run a cmsRun job, redirecting standard output and error to files + lognames = ('stdout', 'stderr') + logfiles = tuple('%s/%s' % (workdir, name) for name in ('stdout', 'stderr')) + stdout = open(logfiles[0], 'w') + stderr = open(logfiles[1], 'w') + job = subprocess.Popen(command, cwd = workdir, env = environment, stdout = stdout, stderr = stderr) + job.communicate() + stdout.close() + stderr.close() + + # if requested, move the logs and any additional artifacts to the log directory + if logdir: + for name in list(keep) + list(lognames): + if os.path.isfile(workdir + '/' + name): + shutil.move(workdir + '/' + name, '%s/cmsRun%06d.%s' % (logdir, job.pid, name)) + logfiles = tuple('%s/cmsRun%06d.%s' % (logdir, job.pid, name) for name in lognames) + + stderr = open(logfiles[1], 'r') + + if (job.returncode < 0): + print "The underlying cmsRun job was killed by signal %d" % -job.returncode + print + print "The last lines of the error log are:" + print "".join(stderr.readlines()[-10:]) + print + print "See %s and %s for the full logs" % logfiles + stderr.close() + return None + + elif (job.returncode > 0): + print "The underlying cmsRun job failed with return code %d" % job.returncode + print + print "The last lines of the error log are:" + print "".join(stderr.readlines()[-10:]) + print + print "See %s and %s for the full logs" % logfiles + stderr.close() + return None + + if verbose: + print "The underlying cmsRun job completed successfully" + + # analyse the output + date_format = '%d-%b-%Y %H:%M:%S.%f' + # expected format + # 100, 18-Mar-2020 12:16:39.172836 CET + begin_pattern = re.compile(r'%MSG-i ThroughputService: *AfterModEndJob') + line_pattern = re.compile(r' *(\d+), (\d+-...-\d\d\d\d \d\d:\d\d:\d\d.\d\d\d\d\d\d) .*') + + events = [] + times = [] + matching = False + for line in stderr: + # look for the begin marker + if not matching: + if begin_pattern.match(line): + matching = True + continue + + matches = line_pattern.match(line) + # check for the end of the events list + if not matches: + break + + # read the matching lines + event = int(matches.group(1)) + time = datetime.strptime(matches.group(2), date_format) + events.append(event) + times.append((time - epoch).total_seconds()) + + stderr.close() + return (tuple(events), tuple(times)) + + +def parseProcess(filename): + # parse the given configuration file and return the `process` object it define + # the import logic is taken from edmConfigDump + try: + handle = open(filename, 'r') + except: + print "Failed to open %s: %s" % (filename, sys.exc_info()[1]) + sys.exit(1) + + # make the behaviour consistent with 'cmsRun file.py' + sys.path.append(os.getcwd()) + try: + pycfg = imp.load_source('pycfg', filename, handle) + process = pycfg.process + except: + print "Failed to parse %s: %s" % (filename, sys.exc_info()[1]) + sys.exit(1) + + handle.close() + return process + + +def multiCmsRun( + process, # the cms.Process object to run + data = None, # a file-like object for storing performance measurements + header = True, # write a header before the measurements + warmup = True, # whether to run an extra warm-up job + logdir = None, # a relative or absolute path where to store individual jobs' log files, or None + keep = [], # additional output files to be kept + verbose = False, # whether to print extra messages + plumbing = False, # print output in a machine-readable format + events = -1, # number of events to process (default: unlimited) + repeats = 1, # number of times to repeat each job (default: 1) + jobs = 1, # number of jobs to run in parallel (default: 1) + threads = 1, # number of CPU threads per job (default: 1) + streams = 1, # number of EDM streams per job (default: 1) + gpus_per_job = 1, # number of GPUs per job (default: 1) + allow_hyperthreading = True, # whether to use extra CPU cores from HyperThreading + set_cpu_affinity = False, # whether to set CPU affinity + set_gpu_affinity = False, # whether yo set GPU affinity + *args): # additional arguments passed to cmsRun + # set the number of streams and threads + process.options.numberOfThreads = cms.untracked.uint32( threads ) + process.options.numberOfStreams = cms.untracked.uint32( streams ) + + # set the number of events to process + process.maxEvents.input = cms.untracked.int32( events ) + + # print a message every 100 events + if not 'ThroughputService' in process.__dict__: + process.ThroughputService = cms.Service('ThroughputService', + enableDQM = cms.untracked.bool(False), + ) + process.ThroughputService.printEventSummary = cms.untracked.bool(True) + process.ThroughputService.eventResolution = cms.untracked.uint32(100) + if events > -1: + process.ThroughputService.eventRange = cms.untracked.uint32(events) + + if not 'MessageLogger' in process.__dict__: + process.load('FWCore.MessageService.MessageLogger_cfi') + if not 'ThroughputService' in process.MessageLogger.categories: + process.MessageLogger.categories.append('ThroughputService') + process.MessageLogger.cerr.ThroughputService = cms.untracked.PSet( + limit = cms.untracked.int32(10000000), + reportEvery = cms.untracked.int32(1) + ) + + # make a full dump of the configuration, to make changes to the number of threads, streams, etc. + workdir = tempfile.mkdtemp(prefix = 'cmsRun') + config = open(os.path.join(workdir, 'process.py'), 'w') + config.write(process.dumpPython()) + config.close() + + cpu_assignment = [ None ] * jobs + if set_cpu_affinity: + # build the list of CPUs for each job: + # - build a list of all "processors", grouped by sockets, cores and hardware threads, e.g. + # [ 0,2,4,6,8,10,12,14,16,18,20,22,24,26,1,3,5,7,9,11,13,15,17,19,21,23,25,27 ] + # - split the list by the number of jobs; if the number of jobs is a multiple of the number of sockets + # the jobs should automatically be split on socket boundaries + # - otherwise some jobs may span multiple sockets, e.g. + # [ 0,2,4,6 ], [ 8,10,12,14 ], [ 16,18,20,22 ], [ 24,26,1,3 ], [ 5,7,9,11 ], [ 13,15,17,19 ], [ 21,23,25,27 ] + # TODO: set the processor assignment as an argument, to support arbitrary splitting + if allow_hyperthreading: + cpu_list = list(itertools.chain(*(map(str, cpu.hardware_threads) for cpu in cpus.values()))) + else: + cpu_list = list(itertools.chain(*(map(str, cpu.physical_processors) for cpu in cpus.values()))) + + # if all the jobs fit within individual sockets, assing jobs to sockets in a round-robin + if len(cpu_list) // len(cpus) // threads * len(cpus) >= jobs: + cpu_assignment = [ list() for i in range(jobs) ] + if allow_hyperthreading: + available_cpus = [ copy.copy(cpu.hardware_threads) for cpu in cpus.values() ] + else: + available_cpus = [ copy.copy(cpu.physical_processors) for cpu in cpus.values() ] + for job in range(jobs): + socket = job % len(cpus) + cpu_assignment[job] = ','.join(map(str, available_cpus[socket][0:threads])) + del available_cpus[socket][0:threads] + + # otherwise, split the list by the number of jobs, and possibly overcommit + else: + if len(cpu_list) >= jobs * threads: + # split the list by the number of jobs + index = [ i * threads for i in range(jobs+1) ] + else: + # fill all cpus and overcommit + index = [ i * len(cpu_list) // jobs for i in range(jobs+1) ] + + cpu_assignment = [ ','.join(cpu_list[index[i]:index[i+1]]) for i in range(jobs) ] + + gpu_assignment = [ None ] * jobs + if set_gpu_affinity: + # build the list of GPUs for each job: + # - if the number of GPUs per job is greater than or equal to the number of GPUs in the system, + # run each job on all GPUs + # - otherwise, assign GPUs to jobs in a round-robin fashon + # TODO: set the GPU assignment as an argument, to support arbitrary splitting + if gpus_per_job >= len(gpus): + gpu_assignment = [ ','.join(map(str, gpus.keys())) for i in range(jobs) ] + else: + gpu_repeated = map(str, itertools.islice(itertools.cycle(gpus.keys()), jobs * gpus_per_job)) + gpu_assignment = [ ','.join(gpu_repeated[i*gpus_per_job:(i+1)*gpus_per_job]) for i in range(jobs) ] + + if warmup: + # warm up to cache the binaries, data and conditions + jobdir = os.path.join(workdir, "warmup") + os.mkdir(jobdir) + # recreate logs' directory + if logdir is not None: + thislogdir = logdir + '/warmup' + shutil.rmtree(thislogdir, True) + os.makedirs(thislogdir) + else: + thislogdir = None + print 'Warming up' + thread = singleCmsRun(config.name, jobdir, thislogdir, [], verbose, cpu_assignment[0], gpu_assignment[0], *args) + thread.start() + thread.join() + shutil.rmtree(jobdir) + print + + if repeats > 1: + n_times = '%d times' % repeats + elif repeats == 1: + n_times = 'once' + else: + n_times = 'indefinitely' + + if events >= 0: + n_events = str(events) + else: + n_events = 'all' + + print 'Running %s over %s events with %d jobs, each with %d threads, %d streams and %d GPUs' % (n_times, n_events, jobs, threads, streams, gpus_per_job) + + # store the values to compute the average throughput over the repetitions + failed = [ False ] * repeats + if repeats > 1 and not plumbing: + throughputs = [ None ] * repeats + overlaps = [ None ] * repeats + + # store performance points for later analysis + if data and header: + data.write('%s, %s, %s, %s, %s, %s, %s, %s\n' % ('jobs', 'overlap', 'CPU threads per job', 'EDM streams per job', 'GPUs per jobs', 'number of events', 'average throughput (ev/s)', 'uncertainty (ev/s)')) + + iterations = xrange(repeats) if repeats > 0 else itertools.count() + for repeat in iterations: + # run the jobs reading the output to extract the event throughput + events = [ None ] * jobs + times = [ None ] * jobs + fits = [ None ] * jobs + job_threads = [ None ] * jobs + # recreate logs' directory + if logdir is not None: + thislogdir = logdir + '/step%04d' % repeat + shutil.rmtree(thislogdir, True) + os.makedirs(thislogdir) + else: + thislogdir = None + # create work threads + for job in range(jobs): + jobdir = os.path.join(workdir, "step%02d_part%02d" % (repeat, job)) + os.mkdir(jobdir) + job_threads[job] = singleCmsRun(config.name, jobdir, thislogdir, keep, verbose, cpu_assignment[job], gpu_assignment[job], *args) + + # start all threads + for thread in job_threads: + thread.start() + + # join all threads + failed_jobs = [ False ] * jobs + consistent_events = defaultdict(int) + for job, thread in enumerate(job_threads): + # implicitly wait for the thread to complete + result = thread.result.get() + if result is None or not(all(result)): + failed_jobs[job] = True + continue + (e, t) = result + consistent_events[tuple(e)] += 1 + events[job] = np.array(e) + times[job] = np.array(t) + print('job %s , thread %s' % (job, thread)) + print ('events %s' %events[job]) + print ('times %s' %times[job]) + fits[job] = stats.linregress(times[job], events[job]) + print ('fits %s' %fits[job].slope) + + # if any jobs failed, skip the whole measurement + if any(failed_jobs): + print '%d %s failed, this measurement will be ignored' % (sum(failed_jobs), 'jobs' if sum(failed_jobs) > 1 else 'job') + failed[repeat] = True + continue + + # if all jobs were successful, delete the temporary directories + for job in range(jobs): + jobdir = os.path.join(workdir, "step%02d_part%02d" % (repeat, job)) + shutil.rmtree(jobdir) + + reference_events = np.array(sorted(consistent_events, key = consistent_events.get, reverse = True)[0]) + + # check for jobs with inconsistent events + inconsistent = [ False ] * jobs + for job in range(jobs): + if (len(events[job]) != len(reference_events)) or any(events[job] != reference_events): + print 'Inconsistent measurement points for job %d, will be skipped' % job + inconsistent[job] = True + + # delete data from inconsistent jobs + for job in range(jobs-1, -1, -1): + if inconsistent[job]: + del times[job] + del fits[job] + del inconsistent[job] + jobs -= 1 + + # measure the average throughput + used_events = reference_events[-1] - reference_events[0] + print('fit slope: %s' % [fit.slope for fit in fits]) + + throughput = sum(fit.slope for fit in fits) + error = math.sqrt(sum(fit.stderr * fit.stderr for fit in fits)) + if jobs > 1: + # if running more than on job in parallel, estimate and print the overlap among them + overlap = (min(t[-1] for t in times) - max(t[0] for t in times)) / sum(t[-1] - t[0] for t in times) * len(times) + if overlap < 0.: + overlap = 0. + # machine- or human-readable formatting + formatting = '%8.1f\t%8.1f\t%d\t%0.1f%%' if plumbing else u'%8.1f \u00b1 %5.1f ev/s (%d events, %0.1f%% overlap)' + print formatting % (throughput, error, used_events, overlap * 100.) + else: + overlap = 1. + # machine- or human-readable formatting + formatting = '%8.1f\t%8.1f\t%d' if plumbing else u'%8.1f \u00b1 %5.1f ev/s (%d events)' + print formatting % (throughput, error, used_events) + + # store the values to compute the average throughput over the repetitions + if repeats > 1 and not plumbing: + throughputs[repeat] = throughput + overlaps[repeat] = overlap + + # store performance points for later analysis + if data: + data.write('%d, %f, %d, %d, %d, %d, %f, %f\n' % (jobs, overlap, threads, streams, gpus_per_job, used_events, throughput, error)) + + + # compute the average throughput over the repetitions + if repeats > 1 and not plumbing: + # filter out the jobs with an overlap lower than 95% + values = [ throughputs[i] for i in range(repeats) if overlaps[i] >= 0.95 ] + n = len(values) + if n > 0: + value = np.average(values) + error = np.std(values, ddof=1) + else: + # no jobs with an overlap > 95%, use the "best" one + value = throughputs[overlaps.index(max(overlaps))] + error = float('nan') + print ' --------------------' + if n == repeats: + formatting = u'%8.1f \u00b1 %5.1f ev/s' + print formatting % (value, error) + elif n > 0: + formatting = u'%8.1f \u00b1 %5.1f ev/s (based on %d measurements)' + print formatting % (value, error, n) + else: + formatting = u'%8.1f (single measurement with the highest overlap)' + print formatting % (value, ) + print + + # delete the temporary work dir + shutil.rmtree(workdir) + + +def info(): + print '%d CPUs:' % len(cpus) + for cpu in cpus.values(): + print ' %d: %s (%d cores, %d threads)' % (cpu.socket, cpu.model, len(cpu.physical_processors), len(cpu.hardware_threads)) + print + + print '%d visible NVIDIA GPUs:' % len(gpus) + for gpu in gpus.values(): + print ' %d: %s' % (gpu.device, gpu.model) + print + + +if __name__ == "__main__": + options = { + 'verbose' : False, + 'plumbing' : False, + 'warmup' : True, + 'events' : 4200, + 'repeats' : 4, + 'jobs' : 2, + 'threads' :16, # per job + 'streams' : 8, # per job + 'gpus_per_job' : 2, # per job + 'allow_hyperthreading': True, + 'set_cpu_affinity' : True, + 'set_gpu_affinity' : True, + } + + # TODO parse arguments and options from the command line + + if options['verbose']: + info() + + if len(sys.argv) > 1: + process = parseProcess(sys.argv[1]) + multiCmsRun(process, **options) + diff --git a/cms/patatrack/cms-patatrack/utility_scripts/plot_scan.py b/cms/patatrack/cms-patatrack/utility_scripts/plot_scan.py new file mode 100755 index 0000000..183ce62 --- /dev/null +++ b/cms/patatrack/cms-patatrack/utility_scripts/plot_scan.py @@ -0,0 +1,116 @@ +#! /usr/bin/env python + +import sys +import os.path + +import numpy as np +import pandas as pd +import matplotlib as mpl +mpl.use('agg') +import seaborn as sns + +# plot content options +options = { + 'normalise': False, # True: plot the average throughput per job, False: plot the total throughput + 'x axis': 'EDM streams', # 'CPU threads per job', 'CPU threads', 'EDM streams per job', 'EDM streams' +} + +# workaround for seaborn 0.9.0 +def fix_plot_range(plot, zoom = False): + data = plot.data[plot._x_var] + xmin = min(data) + xmax = max(data) + step = (xmax - xmin) * 0.05 + plot.set(xlim=(xmin - step, xmax + step)) + if not zoom: + plot.set(ylim=(0, None)) + + +sns.set(style={ # based on 'whitegrid' + 'axes.axisbelow': True, + 'axes.edgecolor': '.15', # .8 + 'axes.facecolor': 'white', + 'axes.grid': True, + 'axes.labelcolor': '.15', + 'axes.linewidth': 1, + 'figure.facecolor': 'white', + 'font.family': ['sans-serif'], + 'font.sans-serif': ['Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', 'sans-serif'], + 'grid.color': '.8', + 'grid.linestyle': '-', + 'image.cmap': 'rocket', + 'legend.frameon': False, + 'legend.numpoints': 1, + 'legend.scatterpoints': 1, + 'lines.solid_capstyle': 'round', + 'text.color': '.15', + 'xtick.color': '.15', + 'xtick.direction': 'out', + 'xtick.major.size': 0, + 'xtick.minor.size': 0, + 'ytick.color': '.15', + 'ytick.direction': 'out', + 'ytick.major.size': 0, + 'ytick.minor.size': 0, +}) + +sns.set_palette([ + (0., 0., 1.), # ROOT kBlue + (1., 0., 0.), # ROOT kRed + (0., 0., 0.), # ROOT kBlack + (1., 0.4, 0.), # ROOT kOrange +7 + (0.8, 0.2, 0.8), # ROOT kMagenta -3 +], 5) + +data = [] + +for filename in sys.argv[1:]: + # expected file format: + # jobs, overlap, CPU threads per job, EDM streams per job, GPUs per jobs, number of events, average throughput (ev/s), uncertainty (ev/s) + # 2, 0.994863, 6, 6, 1, 4000, 3591.314398, 1.665309 + # ... + values = pd.read_csv(filename).rename(columns=lambda x: x.strip()) + + # if the data does not have a name, build it from the file name + if not 'name' in values: + name = os.path.basename(filename) + if '.' in name: + i = name.rindex('.') + name = name[:i] + values.insert(0, 'name', [ name ] * len(values), True) + data.append(values) + +df = pd.concat(data, ignore_index = True) +del data + +# normalise to the number of jobs +if options['normalise']: + df['average throughput (ev/s)'] /= df['jobs'] + df['uncertainty (ev/s)'] /= df['jobs'] + +# compute the total numer of CPU threads and EDM streams +df['CPU threads'] = df['CPU threads per job'] * df['jobs'] +df['EDM streams'] = df['EDM streams per job'] * df['jobs'] + +plot = sns.lmplot( + data = df, + x = options['x axis'], + y = 'average throughput (ev/s)', + fit_reg = True, # estimate and plot a regression model + order = 4, # polynomial fit + hue = 'name', # different categories + height = 5.4, # plot height in inches, at 100 dpi + aspect = 16./9., # plot aspect ratio + legend = True, + legend_out = True, # show the legend to the right of the plot + truncate = False, + ci = 95., + ) + +# zoomed-in version of the plot +fix_plot_range(plot, zoom = True) # workaround for seaborn 0.9.0 +plot.savefig('zoom.png') + +# full Y axis +fix_plot_range(plot) # workaround for seaborn 0.9.0 +plot.savefig('plot.png') diff --git a/cms/patatrack/cms-patatrack/utility_scripts/set_output_encoding.py b/cms/patatrack/cms-patatrack/utility_scripts/set_output_encoding.py new file mode 100644 index 0000000..fa5c837 --- /dev/null +++ b/cms/patatrack/cms-patatrack/utility_scripts/set_output_encoding.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- + +# see https://stackoverflow.com/a/19700891/2050986 + +def set_output_encoding(encoding='utf-8', force=False): + import sys + import codecs + '''When piping to the terminal, python knows the encoding needed, and + sets it automatically. But when piping to another program (for example, + | less), python can not check the output encoding. In that case, it + is None. What I am doing here is to catch this situation for both + stdout and stderr and force the encoding''' + current = sys.stdout.encoding + if current is None or force: + sys.stdout = codecs.getwriter(encoding)(sys.stdout) + current = sys.stderr.encoding + if current is None or force: + sys.stderr = codecs.getwriter(encoding)(sys.stderr) diff --git a/cms/patatrack/cms-patatrack/utility_scripts/sourceFromPixelRaw_cff.py b/cms/patatrack/cms-patatrack/utility_scripts/sourceFromPixelRaw_cff.py new file mode 100644 index 0000000..d2c5f70 --- /dev/null +++ b/cms/patatrack/cms-patatrack/utility_scripts/sourceFromPixelRaw_cff.py @@ -0,0 +1,51 @@ +import FWCore.ParameterSet.Config as cms + +import glob +fed_prefix = '/bmk/data/store/opendata/cms' +fed_path = 'MonteCarloUpgrade/RunIIAutumn18DR/TTToHadronic_TuneCP5_13TeV-powheg-pythia8/run000001' +fed_basedir = fed_prefix + '/' + fed_path +fed_files = glob.glob(fed_basedir + '/*.raw') + +# input +FastMonitoringService = cms.Service( "FastMonitoringService", + filePerFwkStream = cms.untracked.bool( False ), + fastMonIntervals = cms.untracked.uint32( 2 ), + sleepTime = cms.untracked.int32( 1 ) +) + +EvFDaqDirector = cms.Service( "EvFDaqDirector", + runNumber = cms.untracked.uint32( 1 ), + + baseDir = cms.untracked.string( "tmp" ), + buBaseDir = cms.untracked.string( "tmp" ), + + useFileBroker = cms.untracked.bool( False ), + fileBrokerKeepAlive = cms.untracked.bool( True ), + fileBrokerPort = cms.untracked.string( "8080" ), + fileBrokerUseLocalLock = cms.untracked.bool( True ), + fuLockPollInterval = cms.untracked.uint32( 2000 ), + + requireTransfersPSet = cms.untracked.bool( False ), + selectedTransferMode = cms.untracked.string( "" ), + mergingPset = cms.untracked.string( "" ), + + outputAdler32Recheck = cms.untracked.bool( False ), +) + +source = cms.Source( "FedRawDataInputSource", + runNumber = cms.untracked.uint32( 1 ), + getLSFromFilename = cms.untracked.bool(True), + testModeNoBuilderUnit = cms.untracked.bool(False), + verifyAdler32 = cms.untracked.bool( True ), + verifyChecksum = cms.untracked.bool( True ), + alwaysStartFromfirstLS = cms.untracked.uint32( 0 ), + + useL1EventID = cms.untracked.bool( True ), # True for MC, True/False for data + eventChunkBlock = cms.untracked.uint32( 240 ), # 32 + eventChunkSize = cms.untracked.uint32( 240), # 32 + maxBufferedFiles = cms.untracked.uint32( 8 ), # 2 + numBuffers = cms.untracked.uint32( 8 ), # 2 + + fileListMode = cms.untracked.bool( True ), # False + fileNames = cms.untracked.vstring(*fed_files), +) diff --git a/cms/patatrack/cms-patatrack/utility_scripts/threaded.py b/cms/patatrack/cms-patatrack/utility_scripts/threaded.py new file mode 100644 index 0000000..8418aed --- /dev/null +++ b/cms/patatrack/cms-patatrack/utility_scripts/threaded.py @@ -0,0 +1,21 @@ +# see https://stackoverflow.com/questions/6893968/how-to-get-the-return-value-from-a-thread-in-python/14331755#14331755 + +def threaded(f, daemon=False): + import threading + import Queue + + def wrapper(q, *args, **kwargs): + '''this function calls the decorated function and puts the result in a queue''' + ret = f(*args, **kwargs) + q.put(ret) + + def wrap(*args, **kwargs): + '''this is the function returned from the decorator. It fires off wrapper + in a new thread and returns the thread object with the result queue attached''' + q = Queue.Queue() + t = threading.Thread(target=wrapper, args = (q,) + args, kwargs = kwargs) + t.daemon = daemon + t.result = q + return t + + return wrap -- GitLab From 79e1e6384bae267e93e7291bb307862fb60e33cd Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Fri, 3 Jul 2020 22:56:56 +0200 Subject: [PATCH 40/74] no need to dinamically patch patatrack-scripts --- cms/patatrack/nvidia.Dockerfile.2 | 2 -- 1 file changed, 2 deletions(-) diff --git a/cms/patatrack/nvidia.Dockerfile.2 b/cms/patatrack/nvidia.Dockerfile.2 index b863b55..049a12d 100644 --- a/cms/patatrack/nvidia.Dockerfile.2 +++ b/cms/patatrack/nvidia.Dockerfile.2 @@ -25,8 +25,6 @@ RUN tar -cf /tmp/cvmfs_checksum.tar /cvmfs && md5sum /tmp/cvmfs_checksum.tar | c # This may also be cacheable in most cases except when /bmk contents change COPY ./cms-patatrack /bmk/./cms-patatrack -RUN /bmk/./cms-patatrack/prepare-patch.sh - #COPY common/bmk-driver.sh /bmk/./cms-patatrack/bmk-driver.sh # FIXME RUN if [ ! -d /bmk/./cms-patatrack/data ]; then mkdir /bmk/./cms-patatrack/data; fi -- GitLab From da1baa0288212c153687655e5f2deff1ea14e7d9 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sat, 4 Jul 2020 00:22:25 +0200 Subject: [PATCH 41/74] remove interim container --- cms/cms-patatrack-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index 581350f..9f0850a 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -114,6 +114,7 @@ job_snapshot_cvmfs: - ls -R ${CVMFS_EXPORT_DIR} > ${CI_PROJECT_DIR}/cvmfs_export_dir_content after_script: - docker rm -f cvmfs_${CI_JOB_ID} + - docker rm -f patatrack_container only: variables: - $CI_COMMIT_BRANCH =~ /^qa.*$/ -- GitLab From db822a71bc5e9f934f798af59231ae8eb5078423 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sat, 4 Jul 2020 01:31:33 +0200 Subject: [PATCH 42/74] fix cp --- cms/cms-patatrack-ci.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index 9f0850a..ee24689 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -106,9 +106,9 @@ job_snapshot_cvmfs: # run shrinkwrapper - docker exec cvmfs_${CI_JOB_ID} /root/shrinkwrap.sh -t /tmp/traces/ -e ${CVMFS_EXPORT_DIR} -j ${CVMFS_EXPORT_DIR} # FIXME this is a dirty patch needed to make scipy running. cvmfs shrinkwrapper alone does not copy all files of that dir. To be investigated why - - ls -lR ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7 + - ls -lR ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7 > ${CI_PROJECT_DIR}/cvmfs_export_py2-scipy_content - rm -fr ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7/site-packages - - docker cp /cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7/site-packages ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7 + - docker cp patatrack_container:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7/site-packages ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7 # remove duplicated data - rm -rf ${CVMFS_EXPORT_DIR}/cvmfs/.data - ls -R ${CVMFS_EXPORT_DIR} > ${CI_PROJECT_DIR}/cvmfs_export_dir_content @@ -127,6 +127,7 @@ job_snapshot_cvmfs: paths: - ${CI_PROJECT_DIR}/traces - ${CI_PROJECT_DIR}/cvmfs_export_dir_content + - ${CI_PROJECT_DIR}/cvmfs_export_py2-scipy_content - ${CI_PROJECT_DIR}/cms/patatrack/cvmfs expire_in: 1 week when: always -- GitLab From b15c84e954eba4ad156da23eccc3dc0608bc1c2d Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sat, 4 Jul 2020 10:26:05 +0200 Subject: [PATCH 43/74] mv ci scripts ina dedicated folder --- cms/patatrack/{ => ci-scripts}/nvidia.Dockerfile.0 | 0 cms/patatrack/{ => ci-scripts}/nvidia.Dockerfile.1 | 0 cms/patatrack/{ => ci-scripts}/nvidia.Dockerfile.2 | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename cms/patatrack/{ => ci-scripts}/nvidia.Dockerfile.0 (100%) rename cms/patatrack/{ => ci-scripts}/nvidia.Dockerfile.1 (100%) rename cms/patatrack/{ => ci-scripts}/nvidia.Dockerfile.2 (100%) diff --git a/cms/patatrack/nvidia.Dockerfile.0 b/cms/patatrack/ci-scripts/nvidia.Dockerfile.0 similarity index 100% rename from cms/patatrack/nvidia.Dockerfile.0 rename to cms/patatrack/ci-scripts/nvidia.Dockerfile.0 diff --git a/cms/patatrack/nvidia.Dockerfile.1 b/cms/patatrack/ci-scripts/nvidia.Dockerfile.1 similarity index 100% rename from cms/patatrack/nvidia.Dockerfile.1 rename to cms/patatrack/ci-scripts/nvidia.Dockerfile.1 diff --git a/cms/patatrack/nvidia.Dockerfile.2 b/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 similarity index 100% rename from cms/patatrack/nvidia.Dockerfile.2 rename to cms/patatrack/ci-scripts/nvidia.Dockerfile.2 -- GitLab From 44b6889d76224c1db75467670bbffdd1b9e54ce8 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sat, 4 Jul 2020 10:42:38 +0200 Subject: [PATCH 44/74] mv CI scripts in from yml to files --- cms/cms-patatrack-ci.yml | 82 +++++++------------ cms/patatrack/ci-scripts/snapshot_cvmfs.sh | 37 +++++++++ .../ci-scripts/test_standalone_image.sh | 13 +++ 3 files changed, 81 insertions(+), 51 deletions(-) create mode 100644 cms/patatrack/ci-scripts/snapshot_cvmfs.sh create mode 100644 cms/patatrack/ci-scripts/test_standalone_image.sh diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index ee24689..d86f085 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -43,42 +43,42 @@ stages: job_build_image_step0: stage: build_0 before_script: - - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/nvidia.Dockerfile.0 + - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/ci-scripts/nvidia.Dockerfile.0 - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack - export IMAGE_NAME=cms/cms-patatrack-nvidia-0 - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} <<: *template_build_image only: changes: - - cms/patatrack/nvidia.Dockerfile.0 + - cms/patatrack/ci-scripts/nvidia.Dockerfile.0 job_build_image_step1: stage: build_1 before_script: - - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/nvidia.Dockerfile.1 + - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/ci-scripts/nvidia.Dockerfile.1 - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack - export IMAGE_NAME=cms/cms-patatrack-nvidia-1 - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} <<: *template_build_image only: changes: - - cms/patatrack/nvidia.Dockerfile.0 - - cms/patatrack/nvidia.Dockerfile.1 + - cms/patatrack/ci-scripts/nvidia.Dockerfile.0 + - cms/patatrack/ci-scripts/nvidia.Dockerfile.1 job_build_image_step2: stage: build_2 before_script: - - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/nvidia.Dockerfile.2 + - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack - export IMAGE_NAME=cms/cms-patatrack-nvidia-2 - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} <<: *template_build_image only: changes: - - cms/patatrack/* + - cms/patatrack/ci-scripts/nvidia.Dockerfile.* - cms/patatrack/cms-patatrack/* - - cms/patatrack/cms-patatrack/*/* + - cms/patatrack/cms-patatrack/utility_scripts/* job_snapshot_cvmfs: stage: snapshot @@ -87,42 +87,23 @@ job_snapshot_cvmfs: image: name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa before_script: - - export CIENV_CVMFSVOLUME=/scratch/cvmfs_hep/CI-JOB-${CI_JOB_ID} - - export CVMFS_EXPORT_DIR=${CI_PROJECT_DIR}/cms/patatrack - - export CIENV_CVMFSREPO=cms.cern.ch - - export CVMFS_IMAGE=gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/cvmfs-image:${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} - - docker pull ${CVMFS_IMAGE} - - docker run --name cvmfs_${CI_JOB_ID} -d --privileged -v ${CVMFS_EXPORT_DIR}:${CVMFS_EXPORT_DIR} -v ${CIENV_CVMFSVOLUME}:/cvmfs:shared ${CVMFS_IMAGE} -r ${CIENV_CVMFSREPO} -t /tmp/traces - script: - - sleep 1m # to give time to cvmfs to start - - export CIENV_CVMFSVOLUME=/scratch/cvmfs_hep/CI-JOB-${CI_JOB_ID} - - export CVMFS_EXPORT_DIR=${CI_PROJECT_DIR}/cms/patatrack - - echo "CVMFS_EXPORT_DIR is $CVMFS_EXPORT_DIR" - # check cvmfs is running - - docker exec cvmfs_${CI_JOB_ID} cvmfs_config probe - # Here comes the dry run of the CMS Patatrack container. Arguments are for the time being defaults/hardcoded FIXME - - docker pull gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa - - docker run --name patatrack_container --gpus '"device=0"' -v ${CIENV_CVMFSVOLUME}:/cvmfs gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa -e 100 -t 8 -c 1 - # run shrinkwrapper - - docker exec cvmfs_${CI_JOB_ID} /root/shrinkwrap.sh -t /tmp/traces/ -e ${CVMFS_EXPORT_DIR} -j ${CVMFS_EXPORT_DIR} - # FIXME this is a dirty patch needed to make scipy running. cvmfs shrinkwrapper alone does not copy all files of that dir. To be investigated why - - ls -lR ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7 > ${CI_PROJECT_DIR}/cvmfs_export_py2-scipy_content - - rm -fr ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7/site-packages - - docker cp patatrack_container:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7/site-packages ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7 - # remove duplicated data - - rm -rf ${CVMFS_EXPORT_DIR}/cvmfs/.data - - ls -R ${CVMFS_EXPORT_DIR} > ${CI_PROJECT_DIR}/cvmfs_export_dir_content + - source ci-scripts/snapshot_cvmfs.sh + - _before_script + script: + - source ci-scripts/snapshot_cvmfs.sh + - _script after_script: - - docker rm -f cvmfs_${CI_JOB_ID} - - docker rm -f patatrack_container + - source ci-scripts/snapshot_cvmfs.sh + - _after_script only: variables: - $CI_COMMIT_BRANCH =~ /^qa.*$/ - $CI_COMMIT_TAG =~ /^v.*$/ - #changes: - # - cms/patatrack/* - # - cms/patatrack/cms-patatrack/* - # - cms/patatrack/cms-patatrack/*/* + changes: + - cms/patatrack/ci-scripts/nvidia.Dockerfile.* + - cms/patatrack/ci-scripts/snapshot_cvmfs.sh + - cms/patatrack/cms-patatrack/* + - cms/patatrack/cms-patatrack/utility_scripts/* artifacts: paths: - ${CI_PROJECT_DIR}/traces @@ -142,9 +123,10 @@ job_build_standalone_image: <<: *template_build_image only: changes: - - cms/patatrack/* + - cms/patatrack/ci-scripts/nvidia.Dockerfile.* + - cms/patatrack/ci-scripts/snapshot_cvmfs.sh - cms/patatrack/cms-patatrack/* - - cms/patatrack/cms-patatrack/*/* + - cms/patatrack/cms-patatrack/utility_scripts/* job_test_standalone_image: stage: test @@ -153,20 +135,18 @@ job_test_standalone_image: image: name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa script: - - export RESULTS_DIR=/scratch/results/CI-JOB-${CI_JOB_ID} - - export IMAGE_NAME=gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-bmk:${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} - - docker pull ${IMAGE_NAME} - # Here comes the test run of the CMS Patatrack standalone container. Arguments are for the time being defaults/hardcoded FIXME - - docker run --rm --gpus '"device=0"' -v ${RESULTS_DIR}:/results ${IMAGE_NAME} -e 100 -t 8 -c 1 - - mv ${RESULTS_DIR} ${CI_PROJECT_DIR}/. + - source ci-scripts/test_standalone_image.sh + - _script only: variables: - $CI_COMMIT_BRANCH =~ /^qa.*$/ - $CI_COMMIT_TAG =~ /^v.*$/ - #changes: - # - cms/patatrack/* - # - cms/patatrack/cms-patatrack/* - # - cms/patatrack/cms-patatrack/*/* + changes: + - cms/patatrack/ci-scripts/nvidia.Dockerfile.* + - cms/patatrack/ci-scripts/snapshot_cvmfs.sh + - cms/patatrack/ci-scripts/test_standalone_image.sh + - cms/patatrack/cms-patatrack/* + - cms/patatrack/cms-patatrack/utility_scripts/* artifacts: paths: - ${CI_PROJECT_DIR}/${RESULTS_DIR} diff --git a/cms/patatrack/ci-scripts/snapshot_cvmfs.sh b/cms/patatrack/ci-scripts/snapshot_cvmfs.sh new file mode 100644 index 0000000..3fe7b7f --- /dev/null +++ b/cms/patatrack/ci-scripts/snapshot_cvmfs.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +function _before_script() { + docker pull ${CVMFS_IMAGE} + docker run --name cvmfs_${CI_JOB_ID} -d --privileged -v ${CVMFS_EXPORT_DIR}:${CVMFS_EXPORT_DIR} -v ${CIENV_CVMFSVOLUME}:/cvmfs:shared ${CVMFS_IMAGE} -r ${CIENV_CVMFSREPO} -t /tmp/traces +} + +function _script() { + + sleep 1m # to give time to cvmfs to start + echo "CVMFS_EXPORT_DIR is $CVMFS_EXPORT_DIR" + # check cvmfs is running + docker exec cvmfs_${CI_JOB_ID} cvmfs_config probe + # Here comes the dry run of the CMS Patatrack container. Arguments are for the time being defaults/hardcoded FIXME + docker pull gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa + docker run --name patatrack_container --gpus '"device=0"' -v ${CIENV_CVMFSVOLUME}:/cvmfs gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa -e 100 -t 8 -c 1 + # run shrinkwrapper + docker exec cvmfs_${CI_JOB_ID} /root/shrinkwrap.sh -t /tmp/traces/ -e ${CVMFS_EXPORT_DIR} -j ${CVMFS_EXPORT_DIR} + # FIXME this is a dirty patch needed to make scipy running. cvmfs shrinkwrapper alone does not copy all files of that dir. To be investigated why + ls -lR ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7 >${CI_PROJECT_DIR}/cvmfs_export_py2-scipy_content + rm -fr ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7/site-packages + docker cp patatrack_container:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7/site-packages ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7 + # remove duplicated data + rm -rf ${CVMFS_EXPORT_DIR}/cvmfs/.data + ls -R ${CVMFS_EXPORT_DIR} >${CI_PROJECT_DIR}/cvmfs_export_dir_content + +} + +function _after_script() { + docker rm -f cvmfs_${CI_JOB_ID} + docker rm -f patatrack_container +} + +export CIENV_CVMFSVOLUME=/scratch/cvmfs_hep/CI-JOB-${CI_JOB_ID} +export CVMFS_EXPORT_DIR=${CI_PROJECT_DIR}/cms/patatrack +export CIENV_CVMFSREPO=cms.cern.ch +export CVMFS_IMAGE=gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/cvmfs-image:${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} \ No newline at end of file diff --git a/cms/patatrack/ci-scripts/test_standalone_image.sh b/cms/patatrack/ci-scripts/test_standalone_image.sh new file mode 100644 index 0000000..398f72f --- /dev/null +++ b/cms/patatrack/ci-scripts/test_standalone_image.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +function _script() { + + docker pull ${IMAGE_NAME} + # Here comes the test run of the CMS Patatrack standalone container. Arguments are for the time being defaults/hardcoded FIXME + docker run --rm --gpus '"device=0"' -v ${RESULTS_DIR}:/results ${IMAGE_NAME} -e 100 -t 8 -c 1 + mv ${RESULTS_DIR} ${CI_PROJECT_DIR}/. + +} + +export RESULTS_DIR=/scratch/results/CI-JOB-${CI_JOB_ID} +export IMAGE_NAME=gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-bmk:${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} -- GitLab From 9fb999acc54a2f38cc6a9fe913dc9ed207074b93 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sat, 4 Jul 2020 10:51:25 +0200 Subject: [PATCH 45/74] fix ci --- cms/cms-patatrack-ci.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index d86f085..7f5ad5a 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -1,11 +1,11 @@ --- stages: -- build_0 -- build_1 -- build_2 -- snapshot -- build_standalone -- test + - build_0 + - build_1 + - build_2 + - snapshot + - build_standalone + - test #- publish #- announce @@ -87,13 +87,13 @@ job_snapshot_cvmfs: image: name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa before_script: - - source ci-scripts/snapshot_cvmfs.sh + - source cms/patatrack/ci-scripts/snapshot_cvmfs.sh - _before_script - script: - - source ci-scripts/snapshot_cvmfs.sh + script: + - source cms/patatrack/ci-scripts/snapshot_cvmfs.sh - _script after_script: - - source ci-scripts/snapshot_cvmfs.sh + - source cms/patatrack/ci-scripts/snapshot_cvmfs.sh - _after_script only: variables: @@ -135,7 +135,7 @@ job_test_standalone_image: image: name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa script: - - source ci-scripts/test_standalone_image.sh + - source cms/patatrack/ci-scripts/test_standalone_image.sh - _script only: variables: -- GitLab From 43be9c077170a44acebc5eb9e7c17bc255dee519 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sat, 4 Jul 2020 10:55:46 +0200 Subject: [PATCH 46/74] add script doc --- cms/patatrack/ci-scripts/snapshot_cvmfs.sh | 4 ++++ cms/patatrack/ci-scripts/test_standalone_image.sh | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/cms/patatrack/ci-scripts/snapshot_cvmfs.sh b/cms/patatrack/ci-scripts/snapshot_cvmfs.sh index 3fe7b7f..94de56b 100644 --- a/cms/patatrack/ci-scripts/snapshot_cvmfs.sh +++ b/cms/patatrack/ci-scripts/snapshot_cvmfs.sh @@ -1,5 +1,9 @@ #!/bin/bash +# script used in gitlab CI +# for job job_snapshot_cvmfs +# in file cms/cms-patatrack-ci.yml + function _before_script() { docker pull ${CVMFS_IMAGE} docker run --name cvmfs_${CI_JOB_ID} -d --privileged -v ${CVMFS_EXPORT_DIR}:${CVMFS_EXPORT_DIR} -v ${CIENV_CVMFSVOLUME}:/cvmfs:shared ${CVMFS_IMAGE} -r ${CIENV_CVMFSREPO} -t /tmp/traces diff --git a/cms/patatrack/ci-scripts/test_standalone_image.sh b/cms/patatrack/ci-scripts/test_standalone_image.sh index 398f72f..217e361 100644 --- a/cms/patatrack/ci-scripts/test_standalone_image.sh +++ b/cms/patatrack/ci-scripts/test_standalone_image.sh @@ -1,12 +1,14 @@ #!/bin/bash -function _script() { +# script used in gitlab CI +# for job job_test_standalone_image +# in file cms/cms-patatrack-ci.yml +function _script() { docker pull ${IMAGE_NAME} # Here comes the test run of the CMS Patatrack standalone container. Arguments are for the time being defaults/hardcoded FIXME docker run --rm --gpus '"device=0"' -v ${RESULTS_DIR}:/results ${IMAGE_NAME} -e 100 -t 8 -c 1 mv ${RESULTS_DIR} ${CI_PROJECT_DIR}/. - } export RESULTS_DIR=/scratch/results/CI-JOB-${CI_JOB_ID} -- GitLab From e4f10392d36e35b084aac89589b1a3bf065a5b8c Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sat, 4 Jul 2020 11:17:12 +0200 Subject: [PATCH 47/74] fix CI --- cms/cms-patatrack-ci.yml | 2 +- cms/patatrack/ci-scripts/snapshot_cvmfs.sh | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index 7f5ad5a..70e5f55 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -116,7 +116,7 @@ job_snapshot_cvmfs: job_build_standalone_image: stage: build_standalone before_script: - - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/nvidia.Dockerfile.2 + - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack - export IMAGE_NAME=cms/cms-patatrack-nvidia-bmk - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} diff --git a/cms/patatrack/ci-scripts/snapshot_cvmfs.sh b/cms/patatrack/ci-scripts/snapshot_cvmfs.sh index 94de56b..96ad3e0 100644 --- a/cms/patatrack/ci-scripts/snapshot_cvmfs.sh +++ b/cms/patatrack/ci-scripts/snapshot_cvmfs.sh @@ -10,7 +10,6 @@ function _before_script() { } function _script() { - sleep 1m # to give time to cvmfs to start echo "CVMFS_EXPORT_DIR is $CVMFS_EXPORT_DIR" # check cvmfs is running @@ -27,7 +26,6 @@ function _script() { # remove duplicated data rm -rf ${CVMFS_EXPORT_DIR}/cvmfs/.data ls -R ${CVMFS_EXPORT_DIR} >${CI_PROJECT_DIR}/cvmfs_export_dir_content - } function _after_script() { -- GitLab From 96f9253986beb041e25c037797e1be53ef18b7d3 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sat, 4 Jul 2020 11:57:02 +0200 Subject: [PATCH 48/74] checksum immediately after data copy --- cms/patatrack/ci-scripts/nvidia.Dockerfile.1 | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cms/patatrack/ci-scripts/nvidia.Dockerfile.1 b/cms/patatrack/ci-scripts/nvidia.Dockerfile.1 index 6ee5e1a..b63ab1a 100644 --- a/cms/patatrack/ci-scripts/nvidia.Dockerfile.1 +++ b/cms/patatrack/ci-scripts/nvidia.Dockerfile.1 @@ -13,5 +13,8 @@ RUN mkdir -p /bmk/data RUN echo -e "\nExtracting Patatrack dataset..."; \ wget -q https://hep-benchmarks.web.cern.ch/hep-benchmarks/hep-workloads/data/cms/patatrack/opendata.tar -O /bmk/data/opendata.tar; \ - cd /bmk/data/; tar -xvf ./opendata.tar; rm ./opendata.tar + cd /bmk/data/; \ + md5sum opendata.tar | cut -f1 -d" " > /tmp/bmkdata_checksum; \ + tar -xvf ./opendata.tar; \ + rm ./opendata.tar -- GitLab From fadd9b243b21e169253724485c1ed98507227d2a Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sat, 4 Jul 2020 12:03:15 +0200 Subject: [PATCH 49/74] speedup checksums --- cms/patatrack/ci-scripts/nvidia.Dockerfile.2 | 25 ++++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 b/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 index 049a12d..9675274 100644 --- a/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 +++ b/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 @@ -17,23 +17,28 @@ FROM gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack- COPY ./cvmfs /cvmfs RUN chmod -R 555 /cvmfs -RUN tar -cf /tmp/cvmfs_checksum.tar /cvmfs && md5sum /tmp/cvmfs_checksum.tar | cut -f1 -d" " > /tmp/cvmfs_checksum && rm /tmp/cvmfs_checksum.tar +# FIXME This checksum takes a lot of time. +# Commenting it. Can be substituted by a checksum using cvmfs utilities +#RUN tar -cf /tmp/cvmfs_checksum.tar /cvmfs && md5sum /tmp/cvmfs_checksum.tar | cut -f1 -d" " > /tmp/cvmfs_checksum && rm /tmp/cvmfs_checksum.tar +RUN touch /tmp/cvmfs_checksum -# This should normally contain always the same files and be cacheable (BMK-159) -#RUN tar -cf /tmp/bmkdata_checksum.tar /bmk/data && md5sum /tmp/bmkdata_checksum.tar | cut -f1 -d" " > /tmp/bmkdata_checksum && rm /tmp/bmkdata_checksum.tar #FIXME - -# This may also be cacheable in most cases except when /bmk contents change + This may also be cacheable in most cases except when /bmk contents change COPY ./cms-patatrack /bmk/./cms-patatrack -#COPY common/bmk-driver.sh /bmk/./cms-patatrack/bmk-driver.sh # FIXME +# FIXME currently there is not common and the driver is in the patatrack folder +#COPY common/bmk-driver.sh /bmk/./cms-patatrack/bmk-driver.sh -RUN if [ ! -d /bmk/./cms-patatrack/data ]; then mkdir /bmk/./cms-patatrack/data; fi +# Checksum code in orchestrator dir. +# This MUST happen before linking the data dir +# otherwise will take a lot of time to tar +RUN tar -cf /tmp/bmk_checksum.tar /bmk && md5sum /tmp/bmk_checksum.tar | cut -f1 -d" " > /tmp/bmk_checksum && rm /tmp/bmk_checksum.tar #FIXME +# The data dir has already a checksum in /tmp/bmkdata_checksum +# generated in nvidia.Dockerfile.1 +RUN if [ ! -d /bmk/./cms-patatrack/data ]; then mkdir /bmk/./cms-patatrack/data; fi RUN for file in $(cd /bmk/data; ls); do ln -sf /bmk/data/$file /bmk/./cms-patatrack/data/$file; done -#RUN tar -cf /tmp/bmk_checksum.tar /bmk && md5sum /tmp/bmk_checksum.tar | cut -f1 -d" " > /tmp/bmk_checksum && rm /tmp/bmk_checksum.tar #FIXME - -#RUN cvmfs_checksum=`cat /tmp/cvmfs_checksum` && bmkdata_checksum=`cat /tmp/bmkdata_checksum` && bmk_checksum=`cat /tmp/bmk_checksum` && rm /tmp/cvmfs_checksum /tmp/bmkdata_checksum /tmp/bmk_checksum && echo '{"version":"v1.3","description":"CMS RECO of ttbar events, based on CMSSW_10_2_9","cvmfs_checksum":"'$cvmfs_checksum'","bmkdata_checksum":"'$bmkdata_checksum'","bmk_checksum":"'$bmk_checksum'"}' > /bmk/./cms-patatrack/version.json #FIXME +RUN cvmfs_checksum=`cat /tmp/cvmfs_checksum` && bmkdata_checksum=`cat /tmp/bmkdata_checksum` && bmk_checksum=`cat /tmp/bmk_checksum` && rm /tmp/cvmfs_checksum /tmp/bmkdata_checksum /tmp/bmk_checksum && echo '{"version":"v1.3","description":"CMS RECO of ttbar events, based on CMSSW_10_2_9","cvmfs_checksum":"'$cvmfs_checksum'","bmkdata_checksum":"'$bmkdata_checksum'","bmk_checksum":"'$bmk_checksum'"}' > /bmk/./cms-patatrack/version.json #FIXME ENTRYPOINT ["/bmk/./cms-patatrack/cms-patatrack-bmk.sh"] -- GitLab From 7c2cd9084efba420652f39a45fe3cac1ab5252ed Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sat, 4 Jul 2020 20:38:42 +0200 Subject: [PATCH 50/74] remove old images to avoid wasting space --- cms/cms-patatrack-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index 70e5f55..dacaa5e 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -28,6 +28,7 @@ stages: - if [[ -z $IMAGE_TAG ]]; then echo "ERROR variable IMAGE_TAG is not defined "; exit 1; fi - export DESTINATIONS="--destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG --destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8}" - echo "DESTINATIONS $DESTINATIONS" + - docker rmi -f $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG || echo "image $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG does not exist" # Prepare Kaniko configuration file - echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json # Build and push the image from the Dockerfile at the root of the project. -- GitLab From addf33644d79d20cb003dfd72f450dd538c93acd Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sat, 4 Jul 2020 20:39:40 +0200 Subject: [PATCH 51/74] checking scipy libs (temporarly) --- cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh index 6366a94..290eca4 100755 --- a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh +++ b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh @@ -4,7 +4,7 @@ #set -x # enable debug printouts -#set -e # immediate exit on error +set -e # immediate exit on error function myecho(){ @@ -56,9 +56,19 @@ function validateInputArguments(){ source $VO_CMS_SW_DIR/cmsset_default.sh [[ ! -e ${CMSSW_RELEASE} ]] && scram project CMSSW ${CMSSW_RELEASE} - pushd ${CMSSW_RELEASE}; eval `scramv1 runtime -sh`; popd - # Configure WL copy + cd ${CMSSW_RELEASE}/src; + eval `scramv1 runtime -sh`; + cd - + + #export LD_LIBRARY_PATH=/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/biglib/slc7_amd64_gcc820:/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/lib/slc7_amd64_gcc820:/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/external/slc7_amd64_gcc820/lib:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/biglib/slc7_amd64_gcc820:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/lib/slc7_amd64_gcc820:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/external/slc7_amd64_gcc820/lib:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/llvm/9.0.1-pfdnen/lib64:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/gcc/8.2.0-bcolbf/lib64:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/gcc/8.2.0-bcolbf/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 + #export SRT_LD_LIBRARY_PATH_SCRAMRT=/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/biglib/slc7_amd64_gcc820:/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/lib/slc7_amd64_gcc820:/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/external/slc7_amd64_gcc820/lib:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/biglib/slc7_amd64_gcc820:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/lib/slc7_amd64_gcc820:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/external/slc7_amd64_gcc820/lib:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/llvm/9.0.1-pfdnen/lib64:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/gcc/8.2.0-bcolbf/lib64:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/gcc/8.2.0-bcolbf/lib + # Configure WL copy + myecho "info about python" + python --version + which python + python -c 'import scipy; print(scipy.__path__)' + python -c 'from scipy import stats; import numpy as np; x=np.array([1,2,3]); y=np.array([1.1,2,2.9]); print(stats.linregress(x,y).slope)' return 0 } -- GitLab From 90069bd10e5bb26e25c6747056a54e45069b1f21 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 00:43:49 +0200 Subject: [PATCH 52/74] adding custom cvmfs path to be present --- cms/patatrack/traces/cms.cern.ch_spec_custom.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 cms/patatrack/traces/cms.cern.ch_spec_custom.txt diff --git a/cms/patatrack/traces/cms.cern.ch_spec_custom.txt b/cms/patatrack/traces/cms.cern.ch_spec_custom.txt new file mode 100644 index 0000000..e1badbc --- /dev/null +++ b/cms/patatrack/traces/cms.cern.ch_spec_custom.txt @@ -0,0 +1,3 @@ +/slc7_amd64_gcc820/external/py2-future/* +/slc7_amd64_gcc820/external/py2-numpy/* +/slc7_amd64_gcc820/external/py2-scipy/* \ No newline at end of file -- GitLab From ddae33f34fbae1576ba992ff16bd2dbb1d51ef0a Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 00:44:29 +0200 Subject: [PATCH 53/74] comply with new format for shrinkwrap script --- cms/patatrack/ci-scripts/snapshot_cvmfs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cms/patatrack/ci-scripts/snapshot_cvmfs.sh b/cms/patatrack/ci-scripts/snapshot_cvmfs.sh index 96ad3e0..4cf6533 100644 --- a/cms/patatrack/ci-scripts/snapshot_cvmfs.sh +++ b/cms/patatrack/ci-scripts/snapshot_cvmfs.sh @@ -18,7 +18,7 @@ function _script() { docker pull gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa docker run --name patatrack_container --gpus '"device=0"' -v ${CIENV_CVMFSVOLUME}:/cvmfs gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-2:qa -e 100 -t 8 -c 1 # run shrinkwrapper - docker exec cvmfs_${CI_JOB_ID} /root/shrinkwrap.sh -t /tmp/traces/ -e ${CVMFS_EXPORT_DIR} -j ${CVMFS_EXPORT_DIR} + docker exec cvmfs_${CI_JOB_ID} /root/shrinkwrap.sh -t /tmp/traces/ -e ${CVMFS_EXPORT_DIR} # FIXME this is a dirty patch needed to make scipy running. cvmfs shrinkwrapper alone does not copy all files of that dir. To be investigated why ls -lR ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7 >${CI_PROJECT_DIR}/cvmfs_export_py2-scipy_content rm -fr ${CVMFS_EXPORT_DIR}/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/py2-scipy/1.2.3-bcolbf/lib/python2.7/site-packages -- GitLab From f885afa85dd30644359a1a4c764e1179d9485595 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 00:51:08 +0200 Subject: [PATCH 54/74] fixing CMSSW runtime environment --- cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh index 290eca4..eed2c5c 100755 --- a/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh +++ b/cms/patatrack/cms-patatrack/cms-patatrack-bmk.sh @@ -60,14 +60,22 @@ function validateInputArguments(){ eval `scramv1 runtime -sh`; cd - - #export LD_LIBRARY_PATH=/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/biglib/slc7_amd64_gcc820:/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/lib/slc7_amd64_gcc820:/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/external/slc7_amd64_gcc820/lib:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/biglib/slc7_amd64_gcc820:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/lib/slc7_amd64_gcc820:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/external/slc7_amd64_gcc820/lib:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/llvm/9.0.1-pfdnen/lib64:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/gcc/8.2.0-bcolbf/lib64:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/gcc/8.2.0-bcolbf/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 - #export SRT_LD_LIBRARY_PATH_SCRAMRT=/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/biglib/slc7_amd64_gcc820:/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/lib/slc7_amd64_gcc820:/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/external/slc7_amd64_gcc820/lib:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/biglib/slc7_amd64_gcc820:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/lib/slc7_amd64_gcc820:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/external/slc7_amd64_gcc820/lib:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/llvm/9.0.1-pfdnen/lib64:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/gcc/8.2.0-bcolbf/lib64:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/gcc/8.2.0-bcolbf/lib + env | grep LD_LIBRARY_PATH + env | grep SRT_LD_LIBRARY_PATH_SCRAMRT + + # FIXME: so far, after having snapshotted cvmfs the LD_LIBRARY_PATH + # FIXME: does not contain all path needed as when cvmfs is bind mounted + # FIXME: therefore I'm forcing it to be as the correct one + export LD_LIBRARY_PATH=/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/biglib/slc7_amd64_gcc820:/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/lib/slc7_amd64_gcc820:/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/external/slc7_amd64_gcc820/lib:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/biglib/slc7_amd64_gcc820:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/lib/slc7_amd64_gcc820:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/external/slc7_amd64_gcc820/lib:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/llvm/9.0.1-pfdnen/lib64:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/gcc/8.2.0-bcolbf/lib64:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/gcc/8.2.0-bcolbf/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 + + export SRT_LD_LIBRARY_PATH_SCRAMRT=/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/biglib/slc7_amd64_gcc820:/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/lib/slc7_amd64_gcc820:/bmk/cms-patatrack/CMSSW_11_1_0_pre8_Patatrack/external/slc7_amd64_gcc820/lib:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/biglib/slc7_amd64_gcc820:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/lib/slc7_amd64_gcc820:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/cmssw/CMSSW_11_1_0_pre8_Patatrack/external/slc7_amd64_gcc820/lib:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/llvm/9.0.1-pfdnen/lib64:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/gcc/8.2.0-bcolbf/lib64:/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/gcc/8.2.0-bcolbf/lib # Configure WL copy - myecho "info about python" + myecho "info about python and tests" python --version which python python -c 'import scipy; print(scipy.__path__)' + python -c 'import numpy; print(numpy.__path__)' python -c 'from scipy import stats; import numpy as np; x=np.array([1,2,3]); y=np.array([1.1,2,2.9]); print(stats.linregress(x,y).slope)' return 0 } -- GitLab From 73cd360e0dbcdc082c573bd2c95cc720225dc03b Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 01:52:45 +0200 Subject: [PATCH 55/74] fix --- cms/patatrack/ci-scripts/nvidia.Dockerfile.2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 b/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 index 9675274..c14ed61 100644 --- a/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 +++ b/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 @@ -22,7 +22,7 @@ RUN chmod -R 555 /cvmfs #RUN tar -cf /tmp/cvmfs_checksum.tar /cvmfs && md5sum /tmp/cvmfs_checksum.tar | cut -f1 -d" " > /tmp/cvmfs_checksum && rm /tmp/cvmfs_checksum.tar RUN touch /tmp/cvmfs_checksum - This may also be cacheable in most cases except when /bmk contents change +# This may also be cacheable in most cases except when /bmk contents change COPY ./cms-patatrack /bmk/./cms-patatrack # FIXME currently there is not common and the driver is in the patatrack folder -- GitLab From 4f175fcc15da0b455e063936121491d6078aa45d Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 09:58:43 +0200 Subject: [PATCH 56/74] rmi needs auth --- cms/cms-patatrack-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index dacaa5e..7368deb 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -28,9 +28,9 @@ stages: - if [[ -z $IMAGE_TAG ]]; then echo "ERROR variable IMAGE_TAG is not defined "; exit 1; fi - export DESTINATIONS="--destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG --destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8}" - echo "DESTINATIONS $DESTINATIONS" - - docker rmi -f $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG || echo "image $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG does not exist" # Prepare Kaniko configuration file - echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json + - docker rmi -f $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG || echo "image $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG does not exist" # Build and push the image from the Dockerfile at the root of the project. # To push to a specific docker tag, amend the --destination parameter, e.g. --destination $CI_REGISTRY_IMAGE:$CI_BUILD_REF_NAME # See https://docs.gitlab.com/ee/ci/variables/predefined_variables.html#variables-reference for available variables -- GitLab From cebd7b0a63ab6f1e76a956771a4926339e171758 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 10:01:26 +0200 Subject: [PATCH 57/74] amend last commit --- cms/cms-patatrack-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index 7368deb..dacaa5e 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -28,9 +28,9 @@ stages: - if [[ -z $IMAGE_TAG ]]; then echo "ERROR variable IMAGE_TAG is not defined "; exit 1; fi - export DESTINATIONS="--destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG --destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8}" - echo "DESTINATIONS $DESTINATIONS" + - docker rmi -f $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG || echo "image $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG does not exist" # Prepare Kaniko configuration file - echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json - - docker rmi -f $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG || echo "image $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG does not exist" # Build and push the image from the Dockerfile at the root of the project. # To push to a specific docker tag, amend the --destination parameter, e.g. --destination $CI_REGISTRY_IMAGE:$CI_BUILD_REF_NAME # See https://docs.gitlab.com/ee/ci/variables/predefined_variables.html#variables-reference for available variables -- GitLab From 525a8633b339401577b3e9f6ea21b946e373ba2e Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 12:09:41 +0200 Subject: [PATCH 58/74] including provenance folder --- cms/patatrack/cvmfs/{.provenance => .provenance/.keepme} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cms/patatrack/cvmfs/{.provenance => .provenance/.keepme} (100%) diff --git a/cms/patatrack/cvmfs/.provenance b/cms/patatrack/cvmfs/.provenance/.keepme similarity index 100% rename from cms/patatrack/cvmfs/.provenance rename to cms/patatrack/cvmfs/.provenance/.keepme -- GitLab From 3c685f4d98dfdb9d17b93d54a30db85ab9deca92 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 12:14:43 +0200 Subject: [PATCH 59/74] speed up the build procedure --- cms/patatrack/ci-scripts/build_2.sh | 41 +++++++++++++++ cms/patatrack/ci-scripts/nvidia.Dockerfile.2 | 49 +---------------- .../ci-scripts/nvidia.Dockerfile.2_old | 52 +++++++++++++++++++ 3 files changed, 95 insertions(+), 47 deletions(-) create mode 100755 cms/patatrack/ci-scripts/build_2.sh create mode 100644 cms/patatrack/ci-scripts/nvidia.Dockerfile.2_old diff --git a/cms/patatrack/ci-scripts/build_2.sh b/cms/patatrack/ci-scripts/build_2.sh new file mode 100755 index 0000000..2be45f6 --- /dev/null +++ b/cms/patatrack/ci-scripts/build_2.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +set -x +set -e + +# First move all folders in the right place +mv /stage/cvmfs /cvmfs +mv /stage/cms-patatrack /bmk/./cms-patatrack + +# Make only readable +chmod -R 555 /cvmfs + +# FIXME This checksum takes a lot of time. +# Commenting it. Can be substituted by a checksum using cvmfs utilities +#tar -cf /tmp/cvmfs_checksum.tar /cvmfs && md5sum /tmp/cvmfs_checksum.tar | cut -f1 -d" " > /tmp/cvmfs_checksum && rm /tmp/cvmfs_checksum.tar +touch /tmp/cvmfs_checksum + +# Checksum code in orchestrator dir. +# This MUST happen before linking the data dir +# otherwise will take a lot of time to tar +tar -cf /tmp/bmk_checksum.tar /bmk && md5sum /tmp/bmk_checksum.tar | cut -f1 -d" " >/tmp/bmk_checksum && rm /tmp/bmk_checksum.tar #FIXME + +# The data dir has already a checksum in /tmp/bmkdata_checksum +# generated in nvidia.Dockerfile.1 +if [ ! -d /bmk/./cms-patatrack/data ]; then + mkdir /bmk/./cms-patatrack/data +fi +for file in $(ls /bmk/data); do + ln -sf /bmk/data/$file /bmk/./cms-patatrack/data/$file +done + +cvmfs_checksum=$(cat /tmp/cvmfs_checksum) +bmkdata_checksum=$(cat /tmp/bmkdata_checksum) +bmk_checksum=$(cat /tmp/bmk_checksum) +rm /tmp/cvmfs_checksum /tmp/bmkdata_checksum /tmp/bmk_checksum +echo '{"version":"v1.3","description":"CMS RECO of ttbar events, based on CMSSW_10_2_9","cvmfs_checksum":"'$cvmfs_checksum'","bmkdata_checksum":"'$bmkdata_checksum'","bmk_checksum":"'$bmk_checksum'"}' >/bmk/./cms-patatrack/version.json #FIXME + +# Add user 'bmkuser' to benchmarks as a non-root user (BMK-166 and BMK-167) +# shoudl not be needed, using cvmfs read only +#groupadd bmkuser +#useradd -g bmkuser --create-home --shell /bin/bash bmkuser diff --git a/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 b/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 index c14ed61..f601f25 100644 --- a/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 +++ b/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 @@ -1,52 +1,7 @@ FROM gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-1:qa -# ********* DOCKERFILE TEMPLATE start ********* -# ******* PLEASE DO NOT EDIT THIS FILE! ******* -# This is the common template for all HEP workloads (BMK-124 and BMK-159). -# Please add workload-specific instructions in Dockerfile.append. +COPY ./cvmfs ./cms-patatrack ./ci-scripts /stage -# Optionally allow disabling the cache only from this point onwards if using -# docker build -t your-image --build-arg CACHEBUST=$(date +%s) . -# See https://github.com/moby/moby/issues/1996#issuecomment-185872769 - -###ARG CACHEBUST=1 - -###RUN echo CACHEBUST=$CACHEBUST - -# This should normally contain always the same files and be cacheable (BMK-159) -COPY ./cvmfs /cvmfs -RUN chmod -R 555 /cvmfs - -# FIXME This checksum takes a lot of time. -# Commenting it. Can be substituted by a checksum using cvmfs utilities -#RUN tar -cf /tmp/cvmfs_checksum.tar /cvmfs && md5sum /tmp/cvmfs_checksum.tar | cut -f1 -d" " > /tmp/cvmfs_checksum && rm /tmp/cvmfs_checksum.tar -RUN touch /tmp/cvmfs_checksum - -# This may also be cacheable in most cases except when /bmk contents change -COPY ./cms-patatrack /bmk/./cms-patatrack - -# FIXME currently there is not common and the driver is in the patatrack folder -#COPY common/bmk-driver.sh /bmk/./cms-patatrack/bmk-driver.sh - -# Checksum code in orchestrator dir. -# This MUST happen before linking the data dir -# otherwise will take a lot of time to tar -RUN tar -cf /tmp/bmk_checksum.tar /bmk && md5sum /tmp/bmk_checksum.tar | cut -f1 -d" " > /tmp/bmk_checksum && rm /tmp/bmk_checksum.tar #FIXME - -# The data dir has already a checksum in /tmp/bmkdata_checksum -# generated in nvidia.Dockerfile.1 -RUN if [ ! -d /bmk/./cms-patatrack/data ]; then mkdir /bmk/./cms-patatrack/data; fi -RUN for file in $(cd /bmk/data; ls); do ln -sf /bmk/data/$file /bmk/./cms-patatrack/data/$file; done - -RUN cvmfs_checksum=`cat /tmp/cvmfs_checksum` && bmkdata_checksum=`cat /tmp/bmkdata_checksum` && bmk_checksum=`cat /tmp/bmk_checksum` && rm /tmp/cvmfs_checksum /tmp/bmkdata_checksum /tmp/bmk_checksum && echo '{"version":"v1.3","description":"CMS RECO of ttbar events, based on CMSSW_10_2_9","cvmfs_checksum":"'$cvmfs_checksum'","bmkdata_checksum":"'$bmkdata_checksum'","bmk_checksum":"'$bmk_checksum'"}' > /bmk/./cms-patatrack/version.json #FIXME +RUN /stage/ci-scripts/build_2.sh ENTRYPOINT ["/bmk/./cms-patatrack/cms-patatrack-bmk.sh"] - -# This contains provenance data that can never be cached -COPY ./cvmfs/.provenance /cvmfs/.provenance - -# Add user 'bmkuser' to run benchmarks as a non-root user (BMK-166 and BMK-167) -RUN groupadd bmkuser - -RUN useradd -g bmkuser --create-home --shell /bin/bash bmkuser -# ********* DOCKERFILE TEMPLATE end ********* diff --git a/cms/patatrack/ci-scripts/nvidia.Dockerfile.2_old b/cms/patatrack/ci-scripts/nvidia.Dockerfile.2_old new file mode 100644 index 0000000..c14ed61 --- /dev/null +++ b/cms/patatrack/ci-scripts/nvidia.Dockerfile.2_old @@ -0,0 +1,52 @@ +FROM gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-1:qa + +# ********* DOCKERFILE TEMPLATE start ********* +# ******* PLEASE DO NOT EDIT THIS FILE! ******* +# This is the common template for all HEP workloads (BMK-124 and BMK-159). +# Please add workload-specific instructions in Dockerfile.append. + +# Optionally allow disabling the cache only from this point onwards if using +# docker build -t your-image --build-arg CACHEBUST=$(date +%s) . +# See https://github.com/moby/moby/issues/1996#issuecomment-185872769 + +###ARG CACHEBUST=1 + +###RUN echo CACHEBUST=$CACHEBUST + +# This should normally contain always the same files and be cacheable (BMK-159) +COPY ./cvmfs /cvmfs +RUN chmod -R 555 /cvmfs + +# FIXME This checksum takes a lot of time. +# Commenting it. Can be substituted by a checksum using cvmfs utilities +#RUN tar -cf /tmp/cvmfs_checksum.tar /cvmfs && md5sum /tmp/cvmfs_checksum.tar | cut -f1 -d" " > /tmp/cvmfs_checksum && rm /tmp/cvmfs_checksum.tar +RUN touch /tmp/cvmfs_checksum + +# This may also be cacheable in most cases except when /bmk contents change +COPY ./cms-patatrack /bmk/./cms-patatrack + +# FIXME currently there is not common and the driver is in the patatrack folder +#COPY common/bmk-driver.sh /bmk/./cms-patatrack/bmk-driver.sh + +# Checksum code in orchestrator dir. +# This MUST happen before linking the data dir +# otherwise will take a lot of time to tar +RUN tar -cf /tmp/bmk_checksum.tar /bmk && md5sum /tmp/bmk_checksum.tar | cut -f1 -d" " > /tmp/bmk_checksum && rm /tmp/bmk_checksum.tar #FIXME + +# The data dir has already a checksum in /tmp/bmkdata_checksum +# generated in nvidia.Dockerfile.1 +RUN if [ ! -d /bmk/./cms-patatrack/data ]; then mkdir /bmk/./cms-patatrack/data; fi +RUN for file in $(cd /bmk/data; ls); do ln -sf /bmk/data/$file /bmk/./cms-patatrack/data/$file; done + +RUN cvmfs_checksum=`cat /tmp/cvmfs_checksum` && bmkdata_checksum=`cat /tmp/bmkdata_checksum` && bmk_checksum=`cat /tmp/bmk_checksum` && rm /tmp/cvmfs_checksum /tmp/bmkdata_checksum /tmp/bmk_checksum && echo '{"version":"v1.3","description":"CMS RECO of ttbar events, based on CMSSW_10_2_9","cvmfs_checksum":"'$cvmfs_checksum'","bmkdata_checksum":"'$bmkdata_checksum'","bmk_checksum":"'$bmk_checksum'"}' > /bmk/./cms-patatrack/version.json #FIXME + +ENTRYPOINT ["/bmk/./cms-patatrack/cms-patatrack-bmk.sh"] + +# This contains provenance data that can never be cached +COPY ./cvmfs/.provenance /cvmfs/.provenance + +# Add user 'bmkuser' to run benchmarks as a non-root user (BMK-166 and BMK-167) +RUN groupadd bmkuser + +RUN useradd -g bmkuser --create-home --shell /bin/bash bmkuser +# ********* DOCKERFILE TEMPLATE end ********* -- GitLab From e6811394f7461b7143c58462e8dec1e2b2b98040 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 12:30:07 +0200 Subject: [PATCH 60/74] fix / at the end of COPY --- cms/patatrack/ci-scripts/nvidia.Dockerfile.2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 b/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 index f601f25..42dc017 100644 --- a/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 +++ b/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 @@ -1,6 +1,6 @@ FROM gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-1:qa -COPY ./cvmfs ./cms-patatrack ./ci-scripts /stage +COPY ./cvmfs ./cms-patatrack ./ci-scripts /stage/ RUN /stage/ci-scripts/build_2.sh -- GitLab From e81ebcf624c9d6df169750328003f79d0ba04e48 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 12:36:18 +0200 Subject: [PATCH 61/74] fix dockerfile --- cms/patatrack/ci-scripts/nvidia.Dockerfile.2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 b/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 index 42dc017..156bebc 100644 --- a/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 +++ b/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 @@ -1,7 +1,7 @@ FROM gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-1:qa -COPY ./cvmfs ./cms-patatrack ./ci-scripts /stage/ - +COPY . /stage/ +RUN ls -l /stage/* RUN /stage/ci-scripts/build_2.sh ENTRYPOINT ["/bmk/./cms-patatrack/cms-patatrack-bmk.sh"] -- GitLab From e9989c076b7cb5fb239692a81c7938350cfb3518 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 12:45:58 +0200 Subject: [PATCH 62/74] fix build)2 --- cms/patatrack/ci-scripts/nvidia.Dockerfile.2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 b/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 index 156bebc..ecc4be0 100644 --- a/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 +++ b/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 @@ -1,7 +1,7 @@ FROM gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-gpu/cms/cms-patatrack-nvidia-1:qa COPY . /stage/ -RUN ls -l /stage/* +RUN ls -la /stage/* RUN /stage/ci-scripts/build_2.sh ENTRYPOINT ["/bmk/./cms-patatrack/cms-patatrack-bmk.sh"] -- GitLab From 0da2f329ad5ddc66cc7ecf24c3cefcdf60f1426d Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 12:49:45 +0200 Subject: [PATCH 63/74] fix build_2 --- cms/patatrack/ci-scripts/build_2.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cms/patatrack/ci-scripts/build_2.sh b/cms/patatrack/ci-scripts/build_2.sh index 2be45f6..8d3d64f 100755 --- a/cms/patatrack/ci-scripts/build_2.sh +++ b/cms/patatrack/ci-scripts/build_2.sh @@ -29,10 +29,9 @@ for file in $(ls /bmk/data); do ln -sf /bmk/data/$file /bmk/./cms-patatrack/data/$file done -cvmfs_checksum=$(cat /tmp/cvmfs_checksum) -bmkdata_checksum=$(cat /tmp/bmkdata_checksum) -bmk_checksum=$(cat /tmp/bmk_checksum) -rm /tmp/cvmfs_checksum /tmp/bmkdata_checksum /tmp/bmk_checksum +cvmfs_checksum=$(cat /tmp/cvmfs_checksum || echo "NotAvailable") +bmkdata_checksum=$(cat /tmp/bmkdata_checksum || echo "NotAvailable") +bmk_checksum=$(cat /tmp/bmk_checksum || echo "NotAvailable") echo '{"version":"v1.3","description":"CMS RECO of ttbar events, based on CMSSW_10_2_9","cvmfs_checksum":"'$cvmfs_checksum'","bmkdata_checksum":"'$bmkdata_checksum'","bmk_checksum":"'$bmk_checksum'"}' >/bmk/./cms-patatrack/version.json #FIXME # Add user 'bmkuser' to benchmarks as a non-root user (BMK-166 and BMK-167) -- GitLab From 7428e0f5e17b0298a377eb2aafec74f4c7b23882 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 15:12:16 +0200 Subject: [PATCH 64/74] try to speedup build. Kaniko very long --- cms/cms-patatrack-ci.yml | 24 ++++++++++++++++++-- cms/patatrack/ci-scripts/nvidia.Dockerfile.0 | 1 + 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index dacaa5e..984c33d 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -12,7 +12,7 @@ stages: ########################## ## Templates ############# -.definition_build_image: &template_build_image +.definition_build_image_kaniko: &template_build_image_kaniko tags: - hep-workload-gpu-docker-builder image: # NB enable shared runners and do not specify a CI tag @@ -28,7 +28,6 @@ stages: - if [[ -z $IMAGE_TAG ]]; then echo "ERROR variable IMAGE_TAG is not defined "; exit 1; fi - export DESTINATIONS="--destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG --destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8}" - echo "DESTINATIONS $DESTINATIONS" - - docker rmi -f $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG || echo "image $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG does not exist" # Prepare Kaniko configuration file - echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json # Build and push the image from the Dockerfile at the root of the project. @@ -36,6 +35,27 @@ stages: # See https://docs.gitlab.com/ee/ci/variables/predefined_variables.html#variables-reference for available variables - /kaniko/executor --context $CONTEXT --dockerfile $DOCKERFILE $DESTINATIONS +.definition_build_image_kaniko: &template_build_image_kaniko + tags: + - hep-workload-gpu-docker-builder + image: + name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa # Use instead of kaniko + entrypoint: ["/bin/bash"] + script: + - echo "current commit is ${CI_COMMIT_SHA:0:8}" + - echo "current branch is ${CI_COMMIT_BRANCH}" + - echo "current tag is ${CI_COMMIT_TAG}" + - if [[ -z $DOCKERFILE ]]; then echo "ERROR variable DOCKERFILE is not defined "; exit 1; fi + - if [[ -z $CONTEXT ]]; then echo "ERROR variable CONTEXT is not defined "; exit 1; fi + - if [[ -z $IMAGE_NAME ]]; then echo "ERROR variable IMAGE_NAME is not defined "; exit 1; fi + - if [[ -z $IMAGE_TAG ]]; then echo "ERROR variable IMAGE_TAG is not defined "; exit 1; fi + - docker rmi -f $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG || echo "image $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG does not exist" + - echo $CI_BUILD_TOKEN | docker login -u gitlab-ci-token --password-stdin gitlab-registry.cern.ch + - docker build --no-cache -t $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} -f $DOCKERFILE $CONTEXT + - dockert tag $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG + - docker push $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} + - docker push $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG + - docker rmi $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} ########################################################### # docker in docker image: to trigger other docker runs diff --git a/cms/patatrack/ci-scripts/nvidia.Dockerfile.0 b/cms/patatrack/ci-scripts/nvidia.Dockerfile.0 index cd42d72..6d66195 100644 --- a/cms/patatrack/ci-scripts/nvidia.Dockerfile.0 +++ b/cms/patatrack/ci-scripts/nvidia.Dockerfile.0 @@ -1,3 +1,4 @@ +# FIXME: need to build in gitlab this base image. Was done by hand FROM nvidia/cuda:10.1-devel-centos7 RUN yum install -y \ -- GitLab From 7004699bde32daef42b0cb9c44243e9704cbd0e1 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 15:13:58 +0200 Subject: [PATCH 65/74] try to speedup build. Kaniko very long --- cms/cms-patatrack-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index 984c33d..90d917f 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -35,11 +35,11 @@ stages: # See https://docs.gitlab.com/ee/ci/variables/predefined_variables.html#variables-reference for available variables - /kaniko/executor --context $CONTEXT --dockerfile $DOCKERFILE $DESTINATIONS -.definition_build_image_kaniko: &template_build_image_kaniko +.definition_build_image: &template_build_image tags: - hep-workload-gpu-docker-builder image: - name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa # Use instead of kaniko + name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa # Use instead of kaniko. FIXME use a prod tag entrypoint: ["/bin/bash"] script: - echo "current commit is ${CI_COMMIT_SHA:0:8}" -- GitLab From 1737e686c8346c01892631c8b5993237fdf7561b Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 15:15:25 +0200 Subject: [PATCH 66/74] try to speedup build. Kaniko very long --- cms/cms-patatrack-ci.yml | 44 ++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index 90d917f..b3afc25 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -12,28 +12,28 @@ stages: ########################## ## Templates ############# -.definition_build_image_kaniko: &template_build_image_kaniko - tags: - - hep-workload-gpu-docker-builder - image: # NB enable shared runners and do not specify a CI tag - name: gitlab-registry.cern.ch/ci-tools/docker-image-builder # CERN version of the Kaniko image - entrypoint: [""] - script: - - echo "current commit is ${CI_COMMIT_SHA:0:8}" - - echo "current branch is ${CI_COMMIT_BRANCH}" - - echo "current tag is ${CI_COMMIT_TAG}" - - if [[ -z $DOCKERFILE ]]; then echo "ERROR variable DOCKERFILE is not defined "; exit 1; fi - - if [[ -z $CONTEXT ]]; then echo "ERROR variable CONTEXT is not defined "; exit 1; fi - - if [[ -z $IMAGE_NAME ]]; then echo "ERROR variable IMAGE_NAME is not defined "; exit 1; fi - - if [[ -z $IMAGE_TAG ]]; then echo "ERROR variable IMAGE_TAG is not defined "; exit 1; fi - - export DESTINATIONS="--destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG --destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8}" - - echo "DESTINATIONS $DESTINATIONS" - # Prepare Kaniko configuration file - - echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json - # Build and push the image from the Dockerfile at the root of the project. - # To push to a specific docker tag, amend the --destination parameter, e.g. --destination $CI_REGISTRY_IMAGE:$CI_BUILD_REF_NAME - # See https://docs.gitlab.com/ee/ci/variables/predefined_variables.html#variables-reference for available variables - - /kaniko/executor --context $CONTEXT --dockerfile $DOCKERFILE $DESTINATIONS +# .definition_build_image_kaniko: &template_build_image_kaniko +# tags: +# - hep-workload-gpu-docker-builder +# image: # NB enable shared runners and do not specify a CI tag +# name: gitlab-registry.cern.ch/ci-tools/docker-image-builder # CERN version of the Kaniko image +# entrypoint: [""] +# script: +# - echo "current commit is ${CI_COMMIT_SHA:0:8}" +# - echo "current branch is ${CI_COMMIT_BRANCH}" +# - echo "current tag is ${CI_COMMIT_TAG}" +# - if [[ -z $DOCKERFILE ]]; then echo "ERROR variable DOCKERFILE is not defined "; exit 1; fi +# - if [[ -z $CONTEXT ]]; then echo "ERROR variable CONTEXT is not defined "; exit 1; fi +# - if [[ -z $IMAGE_NAME ]]; then echo "ERROR variable IMAGE_NAME is not defined "; exit 1; fi +# - if [[ -z $IMAGE_TAG ]]; then echo "ERROR variable IMAGE_TAG is not defined "; exit 1; fi +# - export DESTINATIONS="--destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG --destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8}" +# - echo "DESTINATIONS $DESTINATIONS" +# # Prepare Kaniko configuration file +# - echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json +# # Build and push the image from the Dockerfile at the root of the project. +# # To push to a specific docker tag, amend the --destination parameter, e.g. --destination $CI_REGISTRY_IMAGE:$CI_BUILD_REF_NAME +# # See https://docs.gitlab.com/ee/ci/variables/predefined_variables.html#variables-reference for available variables +# - /kaniko/executor --context $CONTEXT --dockerfile $DOCKERFILE $DESTINATIONS .definition_build_image: &template_build_image tags: -- GitLab From c31e093845c0408aa21718d2e8fa1c5fa9d3db15 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 15:16:25 +0200 Subject: [PATCH 67/74] try to speedup build. Kaniko very long --- cms/cms-patatrack-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index b3afc25..3ed65c5 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -40,7 +40,7 @@ stages: - hep-workload-gpu-docker-builder image: name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa # Use instead of kaniko. FIXME use a prod tag - entrypoint: ["/bin/bash"] + #entrypoint: ["/bin/bash"] script: - echo "current commit is ${CI_COMMIT_SHA:0:8}" - echo "current branch is ${CI_COMMIT_BRANCH}" -- GitLab From 9f9cab9613d1552343443b408dcbb41feaeb7269 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 15:22:03 +0200 Subject: [PATCH 68/74] try to speedup build. Kaniko very long --- cms/cms-patatrack-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index 3ed65c5..8703a6c 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -40,7 +40,7 @@ stages: - hep-workload-gpu-docker-builder image: name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa # Use instead of kaniko. FIXME use a prod tag - #entrypoint: ["/bin/bash"] + entrypoint: [""] script: - echo "current commit is ${CI_COMMIT_SHA:0:8}" - echo "current branch is ${CI_COMMIT_BRANCH}" -- GitLab From f77bf34c5e99f179fc60e52a4480276725618ad3 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 15:23:39 +0200 Subject: [PATCH 69/74] debug --- .gitlab-ci.yml | 231 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 168 insertions(+), 63 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index dd0005d..8703a6c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,70 +1,175 @@ +--- stages: -- triggers -- test + - build_0 + - build_1 + - build_2 + - snapshot + - build_standalone + - test +#- publish +#- announce -##################################################### -### ATLAS KV (a test of cvmfs functionality) -##################################################### +########################## +## Templates ############# -job_test_kv: - stage: test - image: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa - tags: - - hep-workload-gpu-docker-builder - before_script: - - export CIENV_CVMFSVOLUME=/scratch/cvmfs_hep/CI-JOB-${CI_JOB_ID} - - export CVMFS_IMAGE=gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/cvmfs-image:${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} - - docker pull ${CVMFS_IMAGE} - - docker run --name cvmfs_${CI_JOB_ID} -d --privileged -v ${CIENV_CVMFSVOLUME}:/cvmfs:shared ${CVMFS_IMAGE} -r atlas.cern.ch -t /tmp/traces - script: - - sleep 1m # to give time to cvmfs to start - - export CIENV_CVMFSVOLUME=/scratch/cvmfs_hep/CI-JOB-${CI_JOB_ID} - - docker exec cvmfs_${CI_JOB_ID} cvmfs_config probe - - docker run --rm -v ${CIENV_CVMFSVOLUME}:/cvmfs gitlab-registry.cern.ch/hep-benchmarks/hep-workloads/atlas-kv-bmk:ci1.2 -c 2 -t 1 -e 4 - - docker exec cvmfs_${CI_JOB_ID} cvmfs_talk -i atlas.cern.ch tracebuffer flush - - docker exec cvmfs_${CI_JOB_ID} python /usr/libexec/cvmfs/shrinkwrap/spec_builder.py --policy=exact /tmp/traces/cvmfs-atlas.cern.ch.trace.log /tmp/traces/cvmfs-atlas.cern.ch.spec - - docker cp cvmfs_${CI_JOB_ID}:/tmp/traces ${CI_PROJECT_DIR}/traces - after_script: - - docker rm -f cvmfs_${CI_JOB_ID} - only: - variables: - - $CI_COMMIT_BRANCH =~ /^qa.*$/ - - $CI_COMMIT_TAG =~ /^v.*$/ - artifacts: - paths: - - ${CI_PROJECT_DIR}/traces - expire_in: 1 week - when: always +# .definition_build_image_kaniko: &template_build_image_kaniko +# tags: +# - hep-workload-gpu-docker-builder +# image: # NB enable shared runners and do not specify a CI tag +# name: gitlab-registry.cern.ch/ci-tools/docker-image-builder # CERN version of the Kaniko image +# entrypoint: [""] +# script: +# - echo "current commit is ${CI_COMMIT_SHA:0:8}" +# - echo "current branch is ${CI_COMMIT_BRANCH}" +# - echo "current tag is ${CI_COMMIT_TAG}" +# - if [[ -z $DOCKERFILE ]]; then echo "ERROR variable DOCKERFILE is not defined "; exit 1; fi +# - if [[ -z $CONTEXT ]]; then echo "ERROR variable CONTEXT is not defined "; exit 1; fi +# - if [[ -z $IMAGE_NAME ]]; then echo "ERROR variable IMAGE_NAME is not defined "; exit 1; fi +# - if [[ -z $IMAGE_TAG ]]; then echo "ERROR variable IMAGE_TAG is not defined "; exit 1; fi +# - export DESTINATIONS="--destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG --destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8}" +# - echo "DESTINATIONS $DESTINATIONS" +# # Prepare Kaniko configuration file +# - echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json +# # Build and push the image from the Dockerfile at the root of the project. +# # To push to a specific docker tag, amend the --destination parameter, e.g. --destination $CI_REGISTRY_IMAGE:$CI_BUILD_REF_NAME +# # See https://docs.gitlab.com/ee/ci/variables/predefined_variables.html#variables-reference for available variables +# - /kaniko/executor --context $CONTEXT --dockerfile $DOCKERFILE $DESTINATIONS -##################################################### -### CMS PATATRACK -##################################################### +.definition_build_image: &template_build_image + tags: + - hep-workload-gpu-docker-builder + image: + name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa # Use instead of kaniko. FIXME use a prod tag + entrypoint: [""] + script: + - echo "current commit is ${CI_COMMIT_SHA:0:8}" + - echo "current branch is ${CI_COMMIT_BRANCH}" + - echo "current tag is ${CI_COMMIT_TAG}" + - if [[ -z $DOCKERFILE ]]; then echo "ERROR variable DOCKERFILE is not defined "; exit 1; fi + - if [[ -z $CONTEXT ]]; then echo "ERROR variable CONTEXT is not defined "; exit 1; fi + - if [[ -z $IMAGE_NAME ]]; then echo "ERROR variable IMAGE_NAME is not defined "; exit 1; fi + - if [[ -z $IMAGE_TAG ]]; then echo "ERROR variable IMAGE_TAG is not defined "; exit 1; fi + - docker rmi -f $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG || echo "image $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG does not exist" + - echo $CI_BUILD_TOKEN | docker login -u gitlab-ci-token --password-stdin gitlab-registry.cern.ch + - docker build --no-cache -t $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} -f $DOCKERFILE $CONTEXT + - dockert tag $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG + - docker push $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} + - docker push $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG + - docker rmi $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} -patatrack: - stage: triggers - trigger: - include: - - local: cms/cms-patatrack-ci.yml - strategy: depend - only: - variables: - - $CI_COMMIT_BRANCH =~ /^qa.*$/ - - $CI_COMMIT_TAG =~ /^v.*$/ +########################################################### +# docker in docker image: to trigger other docker runs +########################################################### -##################################################### -### LHC Simple Track -##################################################### +job_build_image_step0: + stage: build_0 + before_script: + - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/ci-scripts/nvidia.Dockerfile.0 + - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack + - export IMAGE_NAME=cms/cms-patatrack-nvidia-0 + - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} + <<: *template_build_image + only: + changes: + - cms/patatrack/ci-scripts/nvidia.Dockerfile.0 -simpletrack: - stage: triggers - trigger: - include: lhc/simpletrack/.simpletrack-ci.yml - strategy: depend - only: - variables: - - $CI_COMMIT_BRANCH =~ /^qa.*$/ - - $CI_COMMIT_TAG =~ /^v.*$/ - changes: - - lhc/simpletrack/Dockerfile.* - - lhc/simpletrack/lhc-simpletrack.* - - lhc/simpletrack/.simpletrack-ci.yml + +job_build_image_step1: + stage: build_1 + before_script: + - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/ci-scripts/nvidia.Dockerfile.1 + - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack + - export IMAGE_NAME=cms/cms-patatrack-nvidia-1 + - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} + <<: *template_build_image + only: + changes: + - cms/patatrack/ci-scripts/nvidia.Dockerfile.0 + - cms/patatrack/ci-scripts/nvidia.Dockerfile.1 + +job_build_image_step2: + stage: build_2 + before_script: + - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 + - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack + - export IMAGE_NAME=cms/cms-patatrack-nvidia-2 + - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} + <<: *template_build_image + only: + changes: + - cms/patatrack/ci-scripts/nvidia.Dockerfile.* + - cms/patatrack/cms-patatrack/* + - cms/patatrack/cms-patatrack/utility_scripts/* + +job_snapshot_cvmfs: + stage: snapshot + tags: + - hep-workload-gpu-docker-builder + image: + name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa + before_script: + - source cms/patatrack/ci-scripts/snapshot_cvmfs.sh + - _before_script + script: + - source cms/patatrack/ci-scripts/snapshot_cvmfs.sh + - _script + after_script: + - source cms/patatrack/ci-scripts/snapshot_cvmfs.sh + - _after_script + only: + variables: + - $CI_COMMIT_BRANCH =~ /^qa.*$/ + - $CI_COMMIT_TAG =~ /^v.*$/ + changes: + - cms/patatrack/ci-scripts/nvidia.Dockerfile.* + - cms/patatrack/ci-scripts/snapshot_cvmfs.sh + - cms/patatrack/cms-patatrack/* + - cms/patatrack/cms-patatrack/utility_scripts/* + artifacts: + paths: + - ${CI_PROJECT_DIR}/traces + - ${CI_PROJECT_DIR}/cvmfs_export_dir_content + - ${CI_PROJECT_DIR}/cvmfs_export_py2-scipy_content + - ${CI_PROJECT_DIR}/cms/patatrack/cvmfs + expire_in: 1 week + when: always + +job_build_standalone_image: + stage: build_standalone + before_script: + - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 + - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack + - export IMAGE_NAME=cms/cms-patatrack-nvidia-bmk + - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} + <<: *template_build_image + only: + changes: + - cms/patatrack/ci-scripts/nvidia.Dockerfile.* + - cms/patatrack/ci-scripts/snapshot_cvmfs.sh + - cms/patatrack/cms-patatrack/* + - cms/patatrack/cms-patatrack/utility_scripts/* + +job_test_standalone_image: + stage: test + tags: + - hep-workload-gpu-docker-builder + image: + name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa + script: + - source cms/patatrack/ci-scripts/test_standalone_image.sh + - _script + only: + variables: + - $CI_COMMIT_BRANCH =~ /^qa.*$/ + - $CI_COMMIT_TAG =~ /^v.*$/ + changes: + - cms/patatrack/ci-scripts/nvidia.Dockerfile.* + - cms/patatrack/ci-scripts/snapshot_cvmfs.sh + - cms/patatrack/ci-scripts/test_standalone_image.sh + - cms/patatrack/cms-patatrack/* + - cms/patatrack/cms-patatrack/utility_scripts/* + artifacts: + paths: + - ${CI_PROJECT_DIR}/${RESULTS_DIR} + expire_in: 1 week + when: always \ No newline at end of file -- GitLab From 5a719ee16631435f0b59c2b4bedd9ffe66746f51 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 15:24:58 +0200 Subject: [PATCH 70/74] debug --- cms/patatrack/ci-scripts/snapshot_cvmfs.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/cms/patatrack/ci-scripts/snapshot_cvmfs.sh b/cms/patatrack/ci-scripts/snapshot_cvmfs.sh index 4cf6533..0f9caf4 100644 --- a/cms/patatrack/ci-scripts/snapshot_cvmfs.sh +++ b/cms/patatrack/ci-scripts/snapshot_cvmfs.sh @@ -11,6 +11,7 @@ function _before_script() { function _script() { sleep 1m # to give time to cvmfs to start + echo "CVMFS_EXPORT_DIR is $CVMFS_EXPORT_DIR" # check cvmfs is running docker exec cvmfs_${CI_JOB_ID} cvmfs_config probe -- GitLab From b009fc80fd18cdb34e370b1abd5426d7d39d601b Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Sun, 5 Jul 2020 17:30:18 +0200 Subject: [PATCH 71/74] fix CI --- .gitlab-ci.yml | 231 ++++++--------------- cms/cms-patatrack-ci.yml | 2 +- cms/patatrack/ci-scripts/build_2.sh | 8 +- cms/patatrack/ci-scripts/snapshot_cvmfs.sh | 1 - 4 files changed, 71 insertions(+), 171 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8703a6c..dd0005d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,175 +1,70 @@ ---- stages: - - build_0 - - build_1 - - build_2 - - snapshot - - build_standalone - - test -#- publish -#- announce +- triggers +- test -########################## -## Templates ############# +##################################################### +### ATLAS KV (a test of cvmfs functionality) +##################################################### -# .definition_build_image_kaniko: &template_build_image_kaniko -# tags: -# - hep-workload-gpu-docker-builder -# image: # NB enable shared runners and do not specify a CI tag -# name: gitlab-registry.cern.ch/ci-tools/docker-image-builder # CERN version of the Kaniko image -# entrypoint: [""] -# script: -# - echo "current commit is ${CI_COMMIT_SHA:0:8}" -# - echo "current branch is ${CI_COMMIT_BRANCH}" -# - echo "current tag is ${CI_COMMIT_TAG}" -# - if [[ -z $DOCKERFILE ]]; then echo "ERROR variable DOCKERFILE is not defined "; exit 1; fi -# - if [[ -z $CONTEXT ]]; then echo "ERROR variable CONTEXT is not defined "; exit 1; fi -# - if [[ -z $IMAGE_NAME ]]; then echo "ERROR variable IMAGE_NAME is not defined "; exit 1; fi -# - if [[ -z $IMAGE_TAG ]]; then echo "ERROR variable IMAGE_TAG is not defined "; exit 1; fi -# - export DESTINATIONS="--destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG --destination $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8}" -# - echo "DESTINATIONS $DESTINATIONS" -# # Prepare Kaniko configuration file -# - echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json -# # Build and push the image from the Dockerfile at the root of the project. -# # To push to a specific docker tag, amend the --destination parameter, e.g. --destination $CI_REGISTRY_IMAGE:$CI_BUILD_REF_NAME -# # See https://docs.gitlab.com/ee/ci/variables/predefined_variables.html#variables-reference for available variables -# - /kaniko/executor --context $CONTEXT --dockerfile $DOCKERFILE $DESTINATIONS +job_test_kv: + stage: test + image: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa + tags: + - hep-workload-gpu-docker-builder + before_script: + - export CIENV_CVMFSVOLUME=/scratch/cvmfs_hep/CI-JOB-${CI_JOB_ID} + - export CVMFS_IMAGE=gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/cvmfs-image:${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} + - docker pull ${CVMFS_IMAGE} + - docker run --name cvmfs_${CI_JOB_ID} -d --privileged -v ${CIENV_CVMFSVOLUME}:/cvmfs:shared ${CVMFS_IMAGE} -r atlas.cern.ch -t /tmp/traces + script: + - sleep 1m # to give time to cvmfs to start + - export CIENV_CVMFSVOLUME=/scratch/cvmfs_hep/CI-JOB-${CI_JOB_ID} + - docker exec cvmfs_${CI_JOB_ID} cvmfs_config probe + - docker run --rm -v ${CIENV_CVMFSVOLUME}:/cvmfs gitlab-registry.cern.ch/hep-benchmarks/hep-workloads/atlas-kv-bmk:ci1.2 -c 2 -t 1 -e 4 + - docker exec cvmfs_${CI_JOB_ID} cvmfs_talk -i atlas.cern.ch tracebuffer flush + - docker exec cvmfs_${CI_JOB_ID} python /usr/libexec/cvmfs/shrinkwrap/spec_builder.py --policy=exact /tmp/traces/cvmfs-atlas.cern.ch.trace.log /tmp/traces/cvmfs-atlas.cern.ch.spec + - docker cp cvmfs_${CI_JOB_ID}:/tmp/traces ${CI_PROJECT_DIR}/traces + after_script: + - docker rm -f cvmfs_${CI_JOB_ID} + only: + variables: + - $CI_COMMIT_BRANCH =~ /^qa.*$/ + - $CI_COMMIT_TAG =~ /^v.*$/ + artifacts: + paths: + - ${CI_PROJECT_DIR}/traces + expire_in: 1 week + when: always -.definition_build_image: &template_build_image - tags: - - hep-workload-gpu-docker-builder - image: - name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa # Use instead of kaniko. FIXME use a prod tag - entrypoint: [""] - script: - - echo "current commit is ${CI_COMMIT_SHA:0:8}" - - echo "current branch is ${CI_COMMIT_BRANCH}" - - echo "current tag is ${CI_COMMIT_TAG}" - - if [[ -z $DOCKERFILE ]]; then echo "ERROR variable DOCKERFILE is not defined "; exit 1; fi - - if [[ -z $CONTEXT ]]; then echo "ERROR variable CONTEXT is not defined "; exit 1; fi - - if [[ -z $IMAGE_NAME ]]; then echo "ERROR variable IMAGE_NAME is not defined "; exit 1; fi - - if [[ -z $IMAGE_TAG ]]; then echo "ERROR variable IMAGE_TAG is not defined "; exit 1; fi - - docker rmi -f $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG || echo "image $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG does not exist" - - echo $CI_BUILD_TOKEN | docker login -u gitlab-ci-token --password-stdin gitlab-registry.cern.ch - - docker build --no-cache -t $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} -f $DOCKERFILE $CONTEXT - - dockert tag $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG - - docker push $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} - - docker push $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG - - docker rmi $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} +##################################################### +### CMS PATATRACK +##################################################### -########################################################### -# docker in docker image: to trigger other docker runs -########################################################### +patatrack: + stage: triggers + trigger: + include: + - local: cms/cms-patatrack-ci.yml + strategy: depend + only: + variables: + - $CI_COMMIT_BRANCH =~ /^qa.*$/ + - $CI_COMMIT_TAG =~ /^v.*$/ -job_build_image_step0: - stage: build_0 - before_script: - - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/ci-scripts/nvidia.Dockerfile.0 - - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack - - export IMAGE_NAME=cms/cms-patatrack-nvidia-0 - - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} - <<: *template_build_image - only: - changes: - - cms/patatrack/ci-scripts/nvidia.Dockerfile.0 +##################################################### +### LHC Simple Track +##################################################### - -job_build_image_step1: - stage: build_1 - before_script: - - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/ci-scripts/nvidia.Dockerfile.1 - - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack - - export IMAGE_NAME=cms/cms-patatrack-nvidia-1 - - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} - <<: *template_build_image - only: - changes: - - cms/patatrack/ci-scripts/nvidia.Dockerfile.0 - - cms/patatrack/ci-scripts/nvidia.Dockerfile.1 - -job_build_image_step2: - stage: build_2 - before_script: - - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 - - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack - - export IMAGE_NAME=cms/cms-patatrack-nvidia-2 - - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} - <<: *template_build_image - only: - changes: - - cms/patatrack/ci-scripts/nvidia.Dockerfile.* - - cms/patatrack/cms-patatrack/* - - cms/patatrack/cms-patatrack/utility_scripts/* - -job_snapshot_cvmfs: - stage: snapshot - tags: - - hep-workload-gpu-docker-builder - image: - name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa - before_script: - - source cms/patatrack/ci-scripts/snapshot_cvmfs.sh - - _before_script - script: - - source cms/patatrack/ci-scripts/snapshot_cvmfs.sh - - _script - after_script: - - source cms/patatrack/ci-scripts/snapshot_cvmfs.sh - - _after_script - only: - variables: - - $CI_COMMIT_BRANCH =~ /^qa.*$/ - - $CI_COMMIT_TAG =~ /^v.*$/ - changes: - - cms/patatrack/ci-scripts/nvidia.Dockerfile.* - - cms/patatrack/ci-scripts/snapshot_cvmfs.sh - - cms/patatrack/cms-patatrack/* - - cms/patatrack/cms-patatrack/utility_scripts/* - artifacts: - paths: - - ${CI_PROJECT_DIR}/traces - - ${CI_PROJECT_DIR}/cvmfs_export_dir_content - - ${CI_PROJECT_DIR}/cvmfs_export_py2-scipy_content - - ${CI_PROJECT_DIR}/cms/patatrack/cvmfs - expire_in: 1 week - when: always - -job_build_standalone_image: - stage: build_standalone - before_script: - - export DOCKERFILE=$CI_PROJECT_DIR/cms/patatrack/ci-scripts/nvidia.Dockerfile.2 - - export CONTEXT=$CI_PROJECT_DIR/cms/patatrack - - export IMAGE_NAME=cms/cms-patatrack-nvidia-bmk - - export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH} - <<: *template_build_image - only: - changes: - - cms/patatrack/ci-scripts/nvidia.Dockerfile.* - - cms/patatrack/ci-scripts/snapshot_cvmfs.sh - - cms/patatrack/cms-patatrack/* - - cms/patatrack/cms-patatrack/utility_scripts/* - -job_test_standalone_image: - stage: test - tags: - - hep-workload-gpu-docker-builder - image: - name: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa - script: - - source cms/patatrack/ci-scripts/test_standalone_image.sh - - _script - only: - variables: - - $CI_COMMIT_BRANCH =~ /^qa.*$/ - - $CI_COMMIT_TAG =~ /^v.*$/ - changes: - - cms/patatrack/ci-scripts/nvidia.Dockerfile.* - - cms/patatrack/ci-scripts/snapshot_cvmfs.sh - - cms/patatrack/ci-scripts/test_standalone_image.sh - - cms/patatrack/cms-patatrack/* - - cms/patatrack/cms-patatrack/utility_scripts/* - artifacts: - paths: - - ${CI_PROJECT_DIR}/${RESULTS_DIR} - expire_in: 1 week - when: always \ No newline at end of file +simpletrack: + stage: triggers + trigger: + include: lhc/simpletrack/.simpletrack-ci.yml + strategy: depend + only: + variables: + - $CI_COMMIT_BRANCH =~ /^qa.*$/ + - $CI_COMMIT_TAG =~ /^v.*$/ + changes: + - lhc/simpletrack/Dockerfile.* + - lhc/simpletrack/lhc-simpletrack.* + - lhc/simpletrack/.simpletrack-ci.yml diff --git a/cms/cms-patatrack-ci.yml b/cms/cms-patatrack-ci.yml index 8703a6c..ab394ab 100644 --- a/cms/cms-patatrack-ci.yml +++ b/cms/cms-patatrack-ci.yml @@ -52,7 +52,7 @@ stages: - docker rmi -f $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG || echo "image $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG does not exist" - echo $CI_BUILD_TOKEN | docker login -u gitlab-ci-token --password-stdin gitlab-registry.cern.ch - docker build --no-cache -t $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} -f $DOCKERFILE $CONTEXT - - dockert tag $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG + - docker tag $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG - docker push $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} - docker push $CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG - docker rmi $CI_REGISTRY_IMAGE/$IMAGE_NAME:ci-${CI_COMMIT_BRANCH}-${CI_COMMIT_SHA:0:8} diff --git a/cms/patatrack/ci-scripts/build_2.sh b/cms/patatrack/ci-scripts/build_2.sh index 8d3d64f..8a78e56 100755 --- a/cms/patatrack/ci-scripts/build_2.sh +++ b/cms/patatrack/ci-scripts/build_2.sh @@ -4,24 +4,29 @@ set -x set -e # First move all folders in the right place +date mv /stage/cvmfs /cvmfs + +date mv /stage/cms-patatrack /bmk/./cms-patatrack # Make only readable +date chmod -R 555 /cvmfs # FIXME This checksum takes a lot of time. # Commenting it. Can be substituted by a checksum using cvmfs utilities #tar -cf /tmp/cvmfs_checksum.tar /cvmfs && md5sum /tmp/cvmfs_checksum.tar | cut -f1 -d" " > /tmp/cvmfs_checksum && rm /tmp/cvmfs_checksum.tar -touch /tmp/cvmfs_checksum # Checksum code in orchestrator dir. # This MUST happen before linking the data dir # otherwise will take a lot of time to tar +date tar -cf /tmp/bmk_checksum.tar /bmk && md5sum /tmp/bmk_checksum.tar | cut -f1 -d" " >/tmp/bmk_checksum && rm /tmp/bmk_checksum.tar #FIXME # The data dir has already a checksum in /tmp/bmkdata_checksum # generated in nvidia.Dockerfile.1 +date if [ ! -d /bmk/./cms-patatrack/data ]; then mkdir /bmk/./cms-patatrack/data fi @@ -29,6 +34,7 @@ for file in $(ls /bmk/data); do ln -sf /bmk/data/$file /bmk/./cms-patatrack/data/$file done +date cvmfs_checksum=$(cat /tmp/cvmfs_checksum || echo "NotAvailable") bmkdata_checksum=$(cat /tmp/bmkdata_checksum || echo "NotAvailable") bmk_checksum=$(cat /tmp/bmk_checksum || echo "NotAvailable") diff --git a/cms/patatrack/ci-scripts/snapshot_cvmfs.sh b/cms/patatrack/ci-scripts/snapshot_cvmfs.sh index 0f9caf4..4cf6533 100644 --- a/cms/patatrack/ci-scripts/snapshot_cvmfs.sh +++ b/cms/patatrack/ci-scripts/snapshot_cvmfs.sh @@ -11,7 +11,6 @@ function _before_script() { function _script() { sleep 1m # to give time to cvmfs to start - echo "CVMFS_EXPORT_DIR is $CVMFS_EXPORT_DIR" # check cvmfs is running docker exec cvmfs_${CI_JOB_ID} cvmfs_config probe -- GitLab From 85d96d0679377ddac77b0984224020c4564606f5 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Mon, 6 Jul 2020 10:23:10 +0200 Subject: [PATCH 72/74] add documentation --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d22104f..1309b73 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,12 @@ # hep-workloads-GPU -Build standalone reference HEP workloads for benchmarking purposes on GPUs \ No newline at end of file +Build standalone reference HEP workloads for benchmarking purposes on GPUs + +The documentation of the individual workloads can be found in the following links + +## Notebooks + +| Internal Doc | Note | External Link | +| :--- | :--- | :--- | +| [Simple Track](https://gitlab.cern.ch/hep-benchmarks/hep-workloads-gpu/-/blob/master/lhc/simpletrack/README.md) | Simulation of LHC turning particles | tbd | +| [CMS Patatrack](https://gitlab.cern.ch/hep-benchmarks/private-tools/tree/master/Analysis/Notebooks/templates) | CMS HLT Reconstruction code | [CMS patatrack github project](https://github.com/cms-patatrack) | -- GitLab From f5f484fec349dcfac2c682fc6790aef08c6abd36 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Mon, 6 Jul 2020 10:24:35 +0200 Subject: [PATCH 73/74] patatrack CI will only run in case of changes --- .gitlab-ci.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index dd0005d..f219562 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,7 +6,7 @@ stages: ### ATLAS KV (a test of cvmfs functionality) ##################################################### -job_test_kv: +.job_test_kv: stage: test image: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/dind:qa tags: @@ -50,7 +50,12 @@ patatrack: variables: - $CI_COMMIT_BRANCH =~ /^qa.*$/ - $CI_COMMIT_TAG =~ /^v.*$/ - + changes: + - cms/patatrack/* + - cms/patatrack/ci-scripts/* + - cms/patatrack/cms-patatrack/* + - cms/patatrack/cms-patatrack/utility_scripts/* + ##################################################### ### LHC Simple Track ##################################################### -- GitLab From ee24974271c8acb474aa1578fd70531677afbf92 Mon Sep 17 00:00:00 2001 From: Domenico Giordano <domenico.giordano@cern.ch> Date: Mon, 6 Jul 2020 10:27:36 +0200 Subject: [PATCH 74/74] fix link doc --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1309b73..47824ca 100644 --- a/README.md +++ b/README.md @@ -9,4 +9,4 @@ The documentation of the individual workloads can be found in the following link | Internal Doc | Note | External Link | | :--- | :--- | :--- | | [Simple Track](https://gitlab.cern.ch/hep-benchmarks/hep-workloads-gpu/-/blob/master/lhc/simpletrack/README.md) | Simulation of LHC turning particles | tbd | -| [CMS Patatrack](https://gitlab.cern.ch/hep-benchmarks/private-tools/tree/master/Analysis/Notebooks/templates) | CMS HLT Reconstruction code | [CMS patatrack github project](https://github.com/cms-patatrack) | +| [CMS Patatrack](https://gitlab.cern.ch/hep-benchmarks/hep-workloads-gpu/-/blob/master/cms/README.md) | CMS HLT Reconstruction code | [CMS patatrack github project](https://github.com/cms-patatrack) | -- GitLab