Moore_bandwidth_test.sh

#!/bin/bash
###############################################################################
# (c) Copyright 2023-2024 CERN for the benefit of the LHCb Collaboration      #
#                                                                             #
# This software is distributed under the terms of the GNU General Public      #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING".   #
#                                                                             #
# In applying this licence, CERN does not waive the privileges and immunities #
# granted to it by virtue of its status as an Intergovernmental Organization  #
# or submit itself to any jurisdiction.                                       #
###############################################################################

usage=$(cat <<-EOF
Wrapper script around jobs to run Moore (Hlt2, Sprucing) with all lines over a large sample,
and then analyse the results to calculate rates, bandwidths and overlaps etc.

Writes out to tmp/Output/

Usage: Moore/run /path/to/Moore_bandwidth_test.sh [options] 2>&1 | tee <path-to-output.log>
       to collect all output as a log file.

       Expected to be called by e.g. Moore_hlt2_bandwidth.sh for the periodic LHCbPR tests.

       The number of events the tests run over is hard-coded below, but this can be modified by setting the environment variable OVERRIDE_EVTMAX.
       This is useful to e.g. run from a released version, where one cannot easily edit the hard-coded number of events below.
       For example: Moore/run env OVERRIDE_EVTMAX=1e4 /path/to/Moore_bandwidth_test.sh ...
--process: "hlt1", "hlt2" or "spruce".
--input-data: "nominal", "hlt2-output-locally" or "hlt2-output-from-eos".
    "hlt2-output-from-eos" and "hlt2-output-locally" are not currently available for process == hlt1 or hlt2.
--stream-config: name of streaming configuration to use in the job e.g. 'production' for hlt2, or 'wg', 'wgpass' or 'turcal' for spruce.
-h|--help: print this message and exit.

EOF
)

# function to export a variable provided as an argument
function parse_value_and_export() {

    if [ $# -ne 3 ]; then
	echo "ERROR: Must provide the argument as $0 <value>"
	return 1
    fi

    if [[ "$3" =~ "--" ]]; then
	echo 'ERROR: Invalid arguments "'"$2 $3"'"'
	return 1;
    fi

    export $1=$3
}

ERR_CODE=0
function STORE_ERR_CODE () {
    ERR_CODE=$(( $? + $ERR_CODE))
}

# if no arguments are provided print the usage and exit
if [ $# -eq 0 ]; then
    echo "$usage"
    exit 0
fi

# parse arguments
while [[ $# -gt 0 ]]; do
    key="$1"
    case $key in
	-h|--help)
	    echo "$usage"
	    exit 0
	    ;;
	--process)
	    parse_value_and_export PROCESS $1 $2
	    shift # parse argument
	    shift # parse value
	    ;;
	--input-data)
	    parse_value_and_export INPUTDATA $1 $2
	    shift # parse argument
	    shift # parse value
	    ;;
    --stream-config)
        parse_value_and_export STREAM_CONFIG $1 $2
        # n_vals=${#STREAM_CONFIGS[@]}
        shift # parse argument
        shift # parse value
        # for i in $(seq $n_vals); do shift; done
        ;;
	*)
	    echo "ERROR: Unknown argument \"$1\""
	    exit 1
	    ;;
    esac
done

# check for empty configuration
if [ -z "$PROCESS" ]; then
    echo "ERROR: You must specify the process via the --process argument"
    exit 1
fi

if [ -z "$INPUTDATA" ]; then
    echo "ERROR: You must specify the input-data via the --input-data argument"
    exit 1
fi

# Set configuration variables and check configuration makes sense
if [ -z "${OVERRIDE_EVTMAX}" ]; then
    EVTMAX=1e5
else
    EVTMAX=${OVERRIDE_EVTMAX}
fi

case $PROCESS in
    hlt1)
    MOORE_THREADS=1 # Although they're mdfs, AllenViaMoore doesn't like to be multi-threaded
    TEST_PATH_PREFIX='$HLT1CONFROOT/tests/options/bandwidth/'
    EVENT_SIZE_UPPER_LIMIT=200
    GAUDIRUN_INPUT_PROCESS="Hlt1"
    OUTPUT_TYPE="MDF"
    case $INPUTDATA in
        nominal)
        EXTRA_OPTS="-e 1" # See next comment up
        ;;
        *)
        echo "ERROR: --input-data must be \"nominal\" for process \"$PROCESS\""
        exit 1
        ;;
    esac
    ;;
    hlt2)
    MOORE_THREADS=${LBN_BUILD_JOBS:-1} # Default to single-threaded
    TEST_PATH_PREFIX='$HLT2CONFROOT/tests/options/bandwidth/'
    EVENT_SIZE_UPPER_LIMIT=200
    GAUDIRUN_INPUT_PROCESS="Hlt2"
    OUTPUT_TYPE="MDF"
    case $INPUTDATA in
        nominal)
        EXTRA_OPTS=''
        ;;
        *)
        echo "ERROR: --input-data must be \"nominal\" for process \"$PROCESS\""
        exit 1
        ;;
    esac
    ;;
    spruce)
    MOORE_THREADS=1 # Cant write to .dst multi-threaded
    TEST_PATH_PREFIX='$HLT2CONFROOT/tests/options/bandwidth/'
    EVENT_SIZE_UPPER_LIMIT=300
    GAUDIRUN_INPUT_PROCESS="Spruce"
    OUTPUT_TYPE="ROOT"
    case $INPUTDATA in
        nominal)
        EXTRA_OPTS='-e 1' # Requires #EvtSlots==1 due to writing dsts, must be single threaded.
        ;;
        hlt2-output-locally)
        # "hlt2-output-locally" corresponds to using the locally-run full-stream output from "process=hlt2, input-data=nominal" test.
        EXTRA_OPTS='-e 1 -um'
        # Flag to make a top-level human-readable output page directing to the Hlt2 and Spruce output pages.
        ;;
        hlt2-output-from-eos)
        # "hlt2-output-from-eos" corresponds to using the uploaded full-stream output from a "process=hlt2, input-data=nominal" test.
        # These files are overwritten during "lhcb-master" builds of "process=hlt2, input-data=nominal", i.e. ~daily.
        EXTRA_OPTS='-e 1 -um'
        ;;
        *)
        echo "ERROR: --input-data must be \"nominal\", \"hlt2-output-locally\", \"hlt2-output-from-eos\" for process \"$PROCESS\""
        exit 1
        ;;
    esac
    ;;
    *)
    echo "Unrecognised process \"$PROCESS\". It must be \"hlt1\" or \"hlt2\" or \"spruce\"."
    exit 1
    ;;
esac

### Now run the tests
# 0. Pre-Job initialising
if [ $PROCESS = "hlt1" ]; then
    CONFIG_FILE="hlt1_bandwidth_input.yaml"
    CONFIG_PATH="${TEST_PATH_PREFIX}${CONFIG_FILE}"

elif [ $PROCESS = "hlt2" ]; then
    CONFIG_FILE="hlt2_bandwidth_input_2024.yaml"
    CONFIG_PATH="${TEST_PATH_PREFIX}${CONFIG_FILE}"

else ## Spruce
    # First sort out the map between spruce STREAM_CONFIG and the Hlt2 STREAM_CONFIG
    # Could simplify this by making the spruce STREAM_CONFIGS == full, turbo and turcal
    if [ $STREAM_CONFIG = "wg" ]; then
        HLT2_STREAM_CONFIG="full"
    elif [ $STREAM_CONFIG = "wgpass" ]; then
        HLT2_STREAM_CONFIG="turbo"
    elif [ $STREAM_CONFIG = "turcal" ]; then
        HLT2_STREAM_CONFIG="turcal"
    else
        echo "ERROR: Unrecognised stream configuration \"$STREAM_CONFIG\". It must be \"wg\", \"wgpass\" or \"turcal\"."
        exit 1
    fi
    LATEST_CONFIG_FILE="spruce_bandwidth_latest_input__${HLT2_STREAM_CONFIG}.yaml"
    STATIC_CONFIG_FILE="spruce_bandwidth_input.yaml"

    # Now can set the config file path
    if  [ $INPUTDATA = "hlt2-output-from-eos" ]; then
        echo "Downloading ${LATEST_CONFIG_FILE} to use as input config."
        CONFIG_PATH="tmp/${LATEST_CONFIG_FILE}"
        PRWWW_PREFIX=(`python -c "from PRConfig.bandwidth_helpers import FileNameHelper; hlpr = FileNameHelper('${PROCESS}'); print( hlpr.current_hlt2_output_dir )"`)
        xrdcp -f ${PRWWW_PREFIX}/${LATEST_CONFIG_FILE} $CONFIG_PATH
        STORE_ERR_CODE
    elif [ $INPUTDATA = "hlt2-output-locally" ]; then
        echo "Using ${LATEST_CONFIG_FILE} generated in previous job as input config."
        CONFIG_PATH="tmp/${LATEST_CONFIG_FILE}"
    else
        CONFIG_PATH="${TEST_PATH_PREFIX}${STATIC_CONFIG_FILE}"
    fi

    echo "Generating TISTOS option file"
    time python -m MooreTests.generate_tistos_option_file -c $CONFIG_PATH --stream-config $STREAM_CONFIG
    STORE_ERR_CODE
    TISTOS_OPTION_FILE_LOCATION=(`python -c "from PRConfig.bandwidth_helpers import FileNameHelper; hlpr = FileNameHelper('${PROCESS}'); print( hlpr.tistos_option_file('${STREAM_CONFIG}') ) "`)
    EXTRA_OPTS+=" ${TISTOS_OPTION_FILE_LOCATION}"
fi

# 1. Run Moore.
# -d downloads the input files locally for speed-up running Moore. Not helpful unless that download is fast for you (e.g. you're at CERN)
echo "Running trigger to obtain MDF/DST files with ${STREAM_CONFIG} streams for comparison over ${CONFIG_PATH}"
time python -m MooreTests.run_bandwidth_test_jobs -d -c=$CONFIG_PATH -sc=$STREAM_CONFIG -n=$EVTMAX -p=$PROCESS -t=$MOORE_THREADS -a=$EVENT_SIZE_UPPER_LIMIT $EXTRA_OPTS "${TEST_PATH_PREFIX}${PROCESS}_bandwidth_${STREAM_CONFIG}_streams.py"
STORE_ERR_CODE

# 2. Work out how many events you ran over - needed for denominator of the rates
# Inputs always MDF files - generalise if ever needed
time python -m MooreTests.read_event_numbers count_input_events -p $PROCESS -sc $STREAM_CONFIG -n $EVTMAX --file-type "MDF"
STORE_ERR_CODE

# 3. Compute line descriptives: persist reco, extra output
# TODO: line_descriptives should use the lines from the streaming configuration
if [ $PROCESS = "hlt1" ]; then
    echo 'Skipping line descriptives as $PROCESS = "hlt1"'
else
    echo 'Obtaining line descriptives'
    time python $PRCONFIGROOT/python/MooreTests/line-descriptives.py ${GAUDIRUN_INPUT_PROCESS}
    STORE_ERR_CODE
fi

echo "Doing analysis for the ${STREAM_CONFIG} streaming configuration..."

# 3. Work out what the streams are from the config JSON; needed for later steps
STREAM_CONFIG_JSON_PATH=(`python -c "from PRConfig.bandwidth_helpers import FileNameHelper; hlpr = FileNameHelper('${PROCESS}'); print(hlpr.stream_config_json_path('${STREAM_CONFIG}'))"`)
STREAM_STR=`(jq -r 'keys | @sh' ${STREAM_CONFIG_JSON_PATH})`
declare -a STREAMS="($STREAM_STR)"
echo "Found ${STREAM_CONFIG} streams: ${STREAMS[@]}"

# 4. Extract filesizes of mdf/dst outputs + compress Hlt2 output.
echo 'Extract filesizes of mdf/dst outputs'
time python $PRCONFIGROOT/python/MooreTests/extract_filesizes.py -p $PROCESS --stream-config $STREAM_CONFIG --streams ${STREAMS[@]}
STORE_ERR_CODE

# 5. Compute similarity matrices between streams by comparing event numbers
if [ $PROCESS = "hlt1" ]; then
    echo 'Skipping similarity matrix per stream as $PROCESS = "hlt1"'
else
    echo "Obtaining similarity matrix for ${STREAM_CONFIG}-stream configuration"
    for stream in "${STREAMS[@]}"; do
        echo "Stream name: ${stream}"
        time python -m MooreTests.read_event_numbers store_output_event_numbers -p $PROCESS --stream-config $STREAM_CONFIG --stream $stream --file-type $OUTPUT_TYPE
        STORE_ERR_CODE
    done
    time python $PRCONFIGROOT/python/MooreTests/calculate_stream_overlap.py inter_stream --streams ${STREAMS[@]} -p $PROCESS --stream-config $STREAM_CONFIG
    STORE_ERR_CODE
fi

# 6. Computing rates per stream as well as per line (tables split by stream)
echo "Obtaining rates and bandwidth for ${STREAM_CONFIG}-stream configuration"
for stream in "${STREAMS[@]}"; do
    echo "Stream name: ${stream}"
    time python $PRCONFIGROOT/python/MooreTests/line-and-stream-rates.py -c $CONFIG_PATH -p $PROCESS -s $stream --stream-config $STREAM_CONFIG --file-type $OUTPUT_TYPE
    STORE_ERR_CODE
done

# 7. Compute intra-stream overlap between WGs (only really envisaged for HLT2 where streams != WGs)
if [ $PROCESS == "hlt2" ]; then
    echo 'Computing intra-stream WG overlaps in each production stream'
    for stream in "${STREAMS[@]}"; do
        echo "Stream name: ${stream}"
        time python $PRCONFIGROOT/python/MooreTests/calculate_stream_overlap.py intra_stream --stream $stream -p $PROCESS --stream-config $STREAM_CONFIG
        STORE_ERR_CODE
    done
fi

# 8. Combine all output into tables
echo 'Combining all rate and bandwidth tables'
time python $PRCONFIGROOT/python/MooreTests/combine_rate_output.py --process $PROCESS --stream-config $STREAM_CONFIG
STORE_ERR_CODE

# 9. Required information for 'hlt2-output-locally' or 'hlt2-output-from-eos' sprucing jobs.
if [ $PROCESS = "hlt2" ] && [ $INPUTDATA = "nominal" ]; then
    echo 'Generating yaml spruce input config to potentially use in a sprucing ["hlt2-output-locally", "hlt2-output-from-eos"] test'
    for STREAM in "${STREAMS[@]}"; do
        time python -m MooreTests.generate_spruce_input_configs -c $CONFIG_PATH --stream-config $STREAM_CONFIG --stream $STREAM
        STORE_ERR_CODE
    done

    # Also copy the mdf and manifest files to eos
    echo 'Copying MDF and manifest files to to_eos/'
    cp `python -c "from PRConfig.bandwidth_helpers import FileNameHelper; hlpr = FileNameHelper('${PROCESS}'); print(hlpr.tck('${STREAM_CONFIG}') )"` tmp/to_eos/
    STORE_ERR_CODE
    for STREAM in "${STREAMS[@]}"; do
        cp `python -c "from PRConfig.bandwidth_helpers import FileNameHelper; hlpr = FileNameHelper('${PROCESS}'); print(hlpr.mdfdst_fname_for_reading('${STREAM_CONFIG}', '${STREAM}') )"` tmp/to_eos/
        STORE_ERR_CODE
    done
fi

# 10. Copy the stream config JSON from tmp/MDF -> tmp/Output so the handler can pick it up. Bit of a hack
# Needed so it can be put in the html page
cp $STREAM_CONFIG_JSON_PATH tmp/Output/
STORE_ERR_CODE

# Return the ERR_CODE, can be picked up one level higher to get the message out before making html
exit $ERR_CODE