Moore_bandwidth_test.sh

#!/bin/bash
###############################################################################
# (c) Copyright 2023-2024 CERN for the benefit of the LHCb Collaboration      #
#                                                                             #
# This software is distributed under the terms of the GNU General Public      #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING".   #
#                                                                             #
# In applying this licence, CERN does not waive the privileges and immunities #
# granted to it by virtue of its status as an Intergovernmental Organization  #
# or submit itself to any jurisdiction.                                       #
###############################################################################

usage=$(cat <<-EOF
Wrapper script around jobs to run Moore (Hlt2, Sprucing) with all lines over a large sample,
and then analyse the results to calculate rates, bandwidths and overlaps etc.

Writes out to $BASEDIR/Output/ (default for BASEDIR is tmp)

Usage: Moore/run /path/to/Moore_bandwidth_test.sh [options] 2>&1 | tee <path-to-output.log>
       to collect all output as a log file.

       Expected to be called by e.g. Moore_hlt2_bandwidth.sh for the periodic LHCbPR tests.

        Requires the following environment variables to be set, see e.g. Moore_hlt2_bandwidth.sh for an example:
            {HLT1, HLT2, SPRUCE}_EVTMAX: maximum number of events to process in the selection processing job
            BASEDIR: location for the output directories
            DOWNLOAD_INPUT_LOCALLY: flag to download the input files for the selection processing jobs.
            BUILD_PAGES_LOCALLY: flag to make the webpages' links navigable locally
            HLT2_THREADS: flag to determine hlt2 threads used __if LBN_BUILD_JOBS is not set__.
            HLT1_INPUT_CONFIG: Override the default hlt1 input config, '' evaluates to default.
            {HLT1, HLT2, SPRUCE}_INPUT_CONFIG: Override the default input config in the selection processing job, '' evaluates to default.

--process: "hlt1", "hlt2" or "spruce".
--input-data: "nominal", "SMOG2_pp_pAr", "hlt2-output-locally" or "hlt2-output-from-eos".
    "hlt2-output-from-eos" and "hlt2-output-locally" are not currently available for process == hlt1 or hlt2.
--stream-config: name of streaming configuration to use in the job e.g. 'production' for hlt2, or 'full', 'turbo' or 'turcal' for spruce.
-h|--help: print this message and exit.

EOF
)

# function to export a variable provided as an argument
function parse_value_and_export() {

    if [ $# -ne 3 ]; then
	echo "ERROR: Must provide the argument as $0 <value>"
	return 1
    fi

    if [[ "$3" =~ "--" ]]; then
	echo 'ERROR: Invalid arguments "'"$2 $3"'"'
	return 1;
    fi

    export $1=$3
}

ERR_CODE=0
function STORE_ERR_CODE () {
    ERR_CODE=$(( $? + $ERR_CODE))
}

# if no arguments are provided print the usage and exit
if [ $# -eq 0 ]; then
    echo "$usage"
    exit 0
fi

# Strict on environment kwargs for robust configurability.
function validate_args () {
    if [ $# -ne 1 ]; then
    echo "ERROR: Usage: $0 <variable>"
    return 1
        fi

    # Allow $1 to be null, but never unset
    if [ -n ${1} ]; then
    echo "INFO: $1 is set to ${!1:-null}"
    return 0
    fi

    echo "ERROR: $1 unset"
    return 1
}
for arg in HLT1_EVTMAX HLT2_EVTMAX SPRUCE_EVTMAX BASEDIR DOWNLOAD_INPUT_LOCALLY BUILD_PAGES_LOCALLY HLT2_THREADS HLT1_INPUT_CONFIG HLT2_INPUT_CONFIG SPRUCE_INPUT_CONFIG;
do
    validate_args $arg
    STORE_ERR_CODE
done
if [ $ERR_CODE -gt 0 ]; then
    echo "FATAL: Errors in validating environment variables"
    exit $ERR_CODE
fi

# to be able to cope with large events
# https://gitlab.cern.ch/lhcb-datapkg/PRConfig/-/issues/35#note_8723098
export IOALG_BUFFER_EVENTS="300"

# parse arguments
while [[ $# -gt 0 ]]; do
    key="$1"
    case $key in
	-h|--help)
	    echo "$usage"
	    exit 0
	    ;;
	--process)
	    parse_value_and_export PROCESS $1 $2
	    shift # parse argument
	    shift # parse value
	    ;;
	--input-data)
	    parse_value_and_export INPUTDATA $1 $2
	    shift # parse argument
	    shift # parse value
	    ;;
    --stream-config)
        parse_value_and_export STREAM_CONFIG $1 $2
        # n_vals=${#STREAM_CONFIGS[@]}
        shift # parse argument
        shift # parse value
        # for i in $(seq $n_vals); do shift; done
        ;;
	*)
	    echo "ERROR: Unknown argument \"$1\""
	    exit 1
	    ;;
    esac
done

# check for empty configuration
if [ -z "$PROCESS" ]; then
    echo "ERROR: You must specify the process via the --process argument"
    exit 1
fi

if [ -z "$INPUTDATA" ]; then
    echo "ERROR: You must specify the input-data via the --input-data argument"
    exit 1
fi

case $PROCESS in
    hlt1)
    MOORE_THREADS=1 # Although they're mdfs, AllenViaMoore doesn't like to be multi-threaded
    TEST_PATH_PREFIX='$HLT1CONFROOT/tests/options/bandwidth/'
    EVENT_SIZE_UPPER_LIMIT=200
    GAUDIRUN_INPUT_PROCESS="Hlt1"
    INPUT_TYPE="MDF"
    OUTPUT_TYPE="MDF"
    EVTMAX=$HLT1_EVTMAX
    case $INPUTDATA in
        nominal)
        EXTRA_OPTS="-e 1" # See next comment up
        ;;
        *)
        echo "ERROR: --input-data must be \"nominal\" for process \"$PROCESS\""
        exit 1
        ;;
    esac
    ;;
    hlt2)
    MOORE_THREADS=${LBN_BUILD_JOBS:-$HLT2_THREADS}
    TEST_PATH_PREFIX='$HLT2CONFROOT/tests/options/bandwidth/'
    EVENT_SIZE_UPPER_LIMIT=200
    GAUDIRUN_INPUT_PROCESS="Hlt2"
    INPUT_TYPE="MDF"
    OUTPUT_TYPE="MDF"
    EXTRA_OPTS=''
    EVTMAX=$HLT2_EVTMAX
    ;;
    spruce)
    MOORE_THREADS=1 # Cant write to .dst multi-threaded
    TEST_PATH_PREFIX='$HLT2CONFROOT/tests/options/bandwidth/'
    EVENT_SIZE_UPPER_LIMIT=300
    GAUDIRUN_INPUT_PROCESS="Spruce"
    OUTPUT_TYPE="ROOT"
    EVTMAX=$SPRUCE_EVTMAX
    case $INPUTDATA in
        nominal)
        INPUT_TYPE="RAW"
        # Requires #EvtSlots==1 due to writing dsts, must be single threaded.
        # Compression as per sprucing production.
        EXTRA_OPTS='-e 1 ${APPCONFIGOPTS}/Persistency/Compression-LZMA-4.py'
        ;;
        hlt2-output-locally)
        # "hlt2-output-locally" corresponds to using the locally-run full-stream output from "process=hlt2, input-data=nominal" test.
        INPUT_TYPE="MDF"
        EXTRA_OPTS='-e 1 -um ${APPCONFIGOPTS}/Persistency/Compression-LZMA-4.py'
        # Flag to make a top-level human-readable output page directing to the Hlt2 and Spruce output pages.
        ;;
        hlt2-output-from-eos)
        # "hlt2-output-from-eos" corresponds to using the uploaded full-stream output from a "process=hlt2, input-data=nominal" test.
        # These files are overwritten during "lhcb-master" builds of "process=hlt2, input-data=nominal", i.e. ~daily.
        INPUT_TYPE="MDF"
        EXTRA_OPTS='-e 1 -um ${APPCONFIGOPTS}/Persistency/Compression-LZMA-4.py'
        ;;
        *)
        echo "ERROR: --input-data must be \"nominal\", \"hlt2-output-locally\", \"hlt2-output-from-eos\" for process \"$PROCESS\""
        exit 1
        ;;
    esac
    ;;
    *)
    echo "Unrecognised process \"$PROCESS\". It must be \"hlt1\" or \"hlt2\" or \"spruce\"."
    exit 1
    ;;
esac

### Now run the tests
# 0. Pre-Job initialising
if [ $PROCESS = "hlt1" ]; then
    CONFIG_FILE="hlt1_bandwidth_input.yaml"
    DEFAULT_CONFIG_PATH="${TEST_PATH_PREFIX}${CONFIG_FILE}"
    CONFIG_PATH=${HLT1_INPUT_CONFIG:-$DEFAULT_CONFIG_PATH}

elif [ $PROCESS = "hlt2" ]; then
    case $INPUTDATA in
        nominal)
        CONFIG_FILE="hlt2_bandwidth_input_2024.yaml"
        ;;
        SMOG2_pp_pAr)
	    CONFIG_FILE="hlt2_bandwidth_input_2024_withSMOG2.yaml"
	    ;;
        *)
        echo "ERROR: --input-data must be \"nominal\", \"SMOG2_pp_pAr\" for process \"$PROCESS\""
        exit 1
        ;;
    esac
    DEFAULT_CONFIG_PATH="${TEST_PATH_PREFIX}${CONFIG_FILE}"
    CONFIG_PATH=${HLT2_INPUT_CONFIG:-$DEFAULT_CONFIG_PATH}

else ## Spruce
    LATEST_CONFIG_FILE="spruce_bandwidth_latest_input__${STREAM_CONFIG}.yaml"
    LATEST_MANIFEST_FILE=(`python -c "from PRConfig.bandwidth_helpers import FileNameHelper; hlpr = FileNameHelper('hlt2', 'production'); print( hlpr.tck(
        on_eos=False, full_path=False) )"`)
    STATIC_CONFIG_FILE="spruce_bandwidth_input.yaml"

    # Now can set the config file path
    if  [ $INPUTDATA = "hlt2-output-from-eos" ]; then
        echo "Downloading ${LATEST_CONFIG_FILE} to use as input config."
        DEFAULT_CONFIG_PATH="${BASEDIR}/${LATEST_CONFIG_FILE}"
        PRWWW_PREFIX=(`python -c "from PRConfig.bandwidth_helpers import FileNameHelper; print(FileNameHelper.current_hlt2_output_dir)"`)
        xrdcp -f ${PRWWW_PREFIX}/${LATEST_CONFIG_FILE} $DEFAULT_CONFIG_PATH
        echo "Downloading input_manifest_file ${LATEST_MANIFEST_FILE} to current directory."
        xrdcp -f ${PRWWW_PREFIX}/${LATEST_MANIFEST_FILE} "$BASEDIR/MDF/${LATEST_MANIFEST_FILE}"
        STORE_ERR_CODE
    elif [ $INPUTDATA = "hlt2-output-locally" ]; then
        echo "Using ${LATEST_CONFIG_FILE} generated in previous job as input config."
        DEFAULT_CONFIG_PATH="$BASEDIR/${LATEST_CONFIG_FILE}"
    else
        DEFAULT_CONFIG_PATH="${TEST_PATH_PREFIX}${STATIC_CONFIG_FILE}"
    fi
    CONFIG_PATH=${SPRUCE_INPUT_CONFIG:-$DEFAULT_CONFIG_PATH}

    echo "Generating TISTOS option file"
    time python -m MooreTests.generate_tistos_option_file -c $CONFIG_PATH --stream-config $STREAM_CONFIG
    STORE_ERR_CODE
    TISTOS_OPTION_FILE_LOCATION=(`python -c "from PRConfig.bandwidth_helpers import FileNameHelper; hlpr = FileNameHelper('${PROCESS}', '${STREAM_CONFIG}'); print( hlpr.tistos_option_file() ) "`)
    EXTRA_OPTS+=" ${TISTOS_OPTION_FILE_LOCATION}"
fi

# 1. Run Moore.
# -d downloads the input files locally for speed-up running Moore. Not helpful unless that download is fast for you (e.g. you're at CERN)
if [ $DOWNLOAD_INPUT_LOCALLY = true ]; then
DOWNLOAD_INPUT_LOCALLY_FLAG='-d';
else
DOWNLOAD_INPUT_LOCALLY_FLAG='';
fi
echo "Running trigger to obtain MDF/DST files with ${STREAM_CONFIG} streams for comparison over ${CONFIG_PATH}"
time python -m MooreTests.run_bandwidth_test_jobs ${DOWNLOAD_INPUT_LOCALLY_FLAG} -c=$CONFIG_PATH -sc=$STREAM_CONFIG -n=$EVTMAX -p=$PROCESS -t=$MOORE_THREADS -a=$EVENT_SIZE_UPPER_LIMIT $EXTRA_OPTS "${TEST_PATH_PREFIX}${PROCESS}_bandwidth_${STREAM_CONFIG}_streams.py"
STORE_ERR_CODE

# 2. Work out how many events you ran over - needed for denominator of the rates
time python -m MooreTests.read_event_numbers count_input_events -p $PROCESS -sc $STREAM_CONFIG -n $EVTMAX --file-type $INPUT_TYPE
STORE_ERR_CODE

echo "Doing analysis for the ${STREAM_CONFIG} streaming configuration..."

# 3. Work out what the streams are from the config JSON; needed for later steps
STREAM_CONFIG_JSON_PATH=(`python -c "from PRConfig.bandwidth_helpers import FileNameHelper; hlpr = FileNameHelper('${PROCESS}', '${STREAM_CONFIG}'); print(hlpr.stream_config_json_path())"`)
STREAM_STR=`(jq -r 'keys | @sh' ${STREAM_CONFIG_JSON_PATH})`
declare -a STREAMS="($STREAM_STR)"
echo "Found ${STREAM_CONFIG} streams: ${STREAMS[@]}"

# 4. Tabulate line descriptives: persist reco, extra output
if [ $PROCESS = "hlt1" ]; then
    echo 'Skipping line descriptives as $PROCESS = "hlt1"'
else
    echo 'Obtaining line descriptives'
    time python -m MooreTests.line_descriptives -p $PROCESS --stream-config $STREAM_CONFIG --streams ${STREAMS[@]}
    STORE_ERR_CODE
fi

# 5. Extract filesizes of mdf/dst outputs + compress Hlt2 output.
echo 'Extract filesizes of mdf/dst outputs'
time python $PRCONFIGROOT/python/MooreTests/extract_filesizes.py -p $PROCESS --stream-config $STREAM_CONFIG --streams ${STREAMS[@]}
STORE_ERR_CODE

# 6. Compute similarity matrices between streams by comparing event numbers
if [ $PROCESS = "hlt1" ]; then
    echo 'Skipping similarity matrix per stream as $PROCESS = "hlt1"'
else
    echo "Obtaining similarity matrix for ${STREAM_CONFIG}-stream configuration"
    for stream in "${STREAMS[@]}"; do
        echo "Stream name: ${stream}"
        time python -m MooreTests.read_event_numbers store_output_event_numbers -p $PROCESS --stream-config $STREAM_CONFIG --stream $stream --file-type $OUTPUT_TYPE
        STORE_ERR_CODE
    done
    time python $PRCONFIGROOT/python/MooreTests/calculate_stream_overlap.py inter_stream --streams ${STREAMS[@]} -p $PROCESS --stream-config $STREAM_CONFIG
    STORE_ERR_CODE
fi

# support legacy LHCb code, that is the one in 2024-patches. Should be dropped when that branch ends
# we are checking whether do_unpacking has been changed or not (simulation argument dropped)
python -c "from GaudiConf.reading import do_unpacking; import inspect, sys; exit('simulation' in inspect.signature(do_unpacking).parameters)"
oldCode=$?

# 7. Computing rates per stream as well as per line (tables split by stream)
echo "Obtaining rates and bandwidth for ${STREAM_CONFIG}-stream configuration"
for stream in "${STREAMS[@]}"; do
    echo "Stream name: ${stream}"
    if [ $oldCode -eq 0 ]; then
        python $PRCONFIGROOT/python/MooreTests/line-and-stream-rates.py -c $CONFIG_PATH -p $PROCESS -s $stream --stream-config $STREAM_CONFIG --file-type $OUTPUT_TYPE
    else
        python $PRCONFIGROOT/python/MooreTests/legacy-line-and-stream-rates.py -c $CONFIG_PATH -p $PROCESS -s $stream --stream-config $STREAM_CONFIG --file-type $OUTPUT_TYPE
    fi
    STORE_ERR_CODE
done

# 8. Compute intra-stream overlap between WGs (only really envisaged for HLT2 where streams != WGs)
if [ $PROCESS == "hlt2" ]; then
    echo 'Computing intra-stream WG overlaps in each production stream'
    for stream in "${STREAMS[@]}"; do
        echo "Stream name: ${stream}"
        time python $PRCONFIGROOT/python/MooreTests/calculate_stream_overlap.py intra_stream --stream $stream -p $PROCESS --stream-config $STREAM_CONFIG
        STORE_ERR_CODE
    done
fi

# 9. Combine all output into tables
echo 'Combining all rate and bandwidth tables'
time python $PRCONFIGROOT/python/MooreTests/combine_rate_output.py --process $PROCESS --stream-config $STREAM_CONFIG
STORE_ERR_CODE

# 9. Required information for 'hlt2-output-locally' or 'hlt2-output-from-eos' sprucing jobs.
if [ $PROCESS = "hlt2" ]; then
    echo 'Generating yaml spruce input config to potentially use in a sprucing ["hlt2-output-locally", "hlt2-output-from-eos"] test'
    for STREAM in "${STREAMS[@]}"; do
        time python -m MooreTests.generate_spruce_input_configs -c $CONFIG_PATH --stream-config $STREAM_CONFIG --stream $STREAM
        STORE_ERR_CODE
    done

    # Also copy the mdf and manifest files to eos
    echo 'Copying MDF and manifest files to to_eos/'
    cp `python -c "from PRConfig.bandwidth_helpers import FileNameHelper; hlpr = FileNameHelper('${PROCESS}', '${STREAM_CONFIG}'); print(hlpr.tck())"` $BASEDIR/to_eos/
    STORE_ERR_CODE
    for STREAM in "${STREAMS[@]}"; do
        cp `python -c "from PRConfig.bandwidth_helpers import FileNameHelper; hlpr = FileNameHelper('${PROCESS}', '${STREAM_CONFIG}'); print(hlpr.mdfdst_fname_for_reading('${STREAM}') )"` $BASEDIR/to_eos/
        STORE_ERR_CODE
    done
fi

# 11. Copy the stream config JSON from $BASEDIR/MDF -> $BASEDIR/Output so the handler can pick it up. Bit of a hack
# Needed so it can be put in the html page
cp $STREAM_CONFIG_JSON_PATH $BASEDIR/Output/
STORE_ERR_CODE

# Return the ERR_CODE, can be picked up one level higher to get the message out before making html
exit $ERR_CODE