Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mstahl/PRConfig
  • chaen/PRConfig
  • lhcb-datapkg/PRConfig
3 results
Show changes
Commits on Source (29)
Showing
with 492 additions and 213 deletions
......@@ -3,7 +3,7 @@
# Maintainer : Ben Couturier
#============================================================================
package PRConfig
version v1r64
version v1r65
#============================================================================
# Structure, i.e. directories to process.
......
......@@ -4,6 +4,59 @@
! Purpose : App Configuration for performance and regression tests
!-----------------------------------------------------------------------------
========================= PRConfig v1r65 2023-12-07 =========================
! 2023-12-05 - commit 2fd9c22
- Merge branch 'lugrazet-BW-hlt1-fix-loggingerror' into 'master'
[RTA BW Tests] quick fix for Hlt1 BW test. len(inputs) < 2 breaks logging
string
See merge request lhcb-datapkg/PRConfig!363
! 2023-12-05 - commit ee5e840
- Merge branch 'spruce-bw-input-Dec23' into 'master'
Add new Dec2023 samples for Sprucing PR tests
See merge request lhcb-datapkg/PRConfig!364
! 2023-11-29 - commit f4c873b
- Merge branch 'rjhunter-bwtest-cleanup-copies' into 'master'
[RTA BW tests] Small cleanups after !359
See merge request lhcb-datapkg/PRConfig!362
! 2023-11-28 - commit 6f6c963
- Merge branch 'rjhunter-reduce-moore-threads-in-bw-test' into 'master'
[Bandwidth tests] Use LBN_BUILD_JOBS to properly set n_threads on Moore in BW
test
See merge request lhcb-datapkg/PRConfig!356
! 2023-11-16 - commit 11208b4
- Merge branch 'rjhunter-chained-test-feasibility' into 'master'
[RTA BW tests] Test feasibility of copying HLT2 output to read into sprucing
test
See merge request lhcb-datapkg/PRConfig!359
! 2023-11-08 - commit ac9460c
- Merge branch 'lugrazet-BW-hlt1testpage-cleanup' into 'master'
[RTA BW Tests] BW test page clean-ups
See merge request lhcb-datapkg/PRConfig!355
========================= PRConfig v1r64 2023-11-01 =========================
! 2023-11-01 - commit f7f0f10
......
......@@ -67,8 +67,8 @@ def calculate_overlap_matrix(df):
for target_stream in df.columns:
for comparison_stream in df.columns:
cond_prob_per_stream[target_stream].append(
sum(df[comparison_stream] * df[target_stream]) / sum(
df[target_stream]))
sum(df[comparison_stream] * df[target_stream]) /
sum(df[target_stream]) if sum(df[target_stream]) > 0 else 0)
overlap_matrix_df = pd.DataFrame(
cond_prob_per_stream, columns=df.columns, index=df.columns)
return overlap_matrix_df
......
#!/usr/bin/env python
###############################################################################
# (c) Copyright 2023 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
# #
# In applying this licence, CERN does not waive the privileges and immunities #
# granted to it by virtue of its status as an Intergovernmental Organization #
# or submit itself to any jurisdiction. #
###############################################################################
import socket
import os
import atexit
import tempfile
import logging
from datetime import datetime
import shutil
from Moore.qmtest.context import download_mdf_inputs_locally
# Default cache dir is the current working directory as this is most convenient for the machine
# that the test runs on periodically. It assumes the working directory is not cleaned up often,
# and so the files remain available for subsequent jobs.
DEFAULT_CACHE_DIRS = {'default': '.'}
# prefer XDG_RUNTIME_DIR which should be on tmpfs
FALLBACK_CACHE_DIR = os.getenv('XDG_RUNTIME_DIR', tempfile.gettempdir())
FILE_TO_COPY = "mdf:root://eoslhcb.cern.ch//eos/lhcb/storage/lhcbpr/www/UpgradeRateTest/hlt2_bw_testing__production__full.mdf"
def default_cache_dirs():
hostname = socket.getfqdn()
dirs = DEFAULT_CACHE_DIRS.get(hostname, DEFAULT_CACHE_DIRS['default'])
return dirs
def main():
logging.basicConfig(
format='%(levelname)-7s %(message)s', level=logging.INFO)
cache_dir = default_cache_dirs()
if not os.path.isdir(cache_dir):
fallback_dir = tempfile.mkdtemp(
prefix='bandwidth-', dir=FALLBACK_CACHE_DIR)
logging.warning('default cache dir {!r} doesnt exist, using {}'.format(
cache_dir, fallback_dir))
cache_dir = fallback_dir
# if we use the fallback directory, clean up after ourselves
atexit.register(shutil.rmtree, fallback_dir)
# Now download file
logging.info(f'Downloading input file {FILE_TO_COPY}')
# download_mdf_inputs_locally only downloads if files
# are not already available locally on the machine
logging.info(f'Downloading inputs for bandwidth job to {cache_dir}')
before_copy = datetime.now()
kB_to_GB = 1e3
downloaded_path = download_mdf_inputs_locally(
[FILE_TO_COPY], cache_dir, max_size=300 * kB_to_GB * 2e4
) # Guesses as to output size and n_events in the FULL stream TODO improve
logging.info(
f"Downloaded {downloaded_path}. This took: {datetime.now() - before_copy}"
)
if __name__ == "__main__":
main()
###############################################################################
# (c) Copyright 2000-2023 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
# #
# In applying this licence, CERN does not waive the privileges and immunities #
# granted to it by virtue of its status as an Intergovernmental Organization #
# or submit itself to any jurisdiction. #
###############################################################################
'''
Takes the config file describing the Hlt2 job's input file.
Generates a metadata yaml about the FULL stream output of the Hlt2 job.
This metadata can then be used as a config file for a 'latest' Spruce job's input file.
'''
import argparse
import json
import yaml
from PRConfig.bandwidth_helpers import FileNameHelper, parse_yaml
from PRConfig.TestFileDB import test_file_db
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'-c',
'--config',
type=str,
required=True,
help='Path to yaml config file defining the input.')
args = parser.parse_args()
fname_helper = FileNameHelper(process="hlt2")
config = parse_yaml(args.config)
fname_helper_args = {"stream_config": "production", "stream": "full"}
opts = {}
opts["input_files"] = [fname_helper.mdf_prwww_path(**fname_helper_args)]
opts["input_manifest_file"] = fname_helper.manifest_prwww_path(
fname_helper_args['stream_config'])
for key in ['input_raw_format', 'velo_radial_opening', 'nu']:
opts[key] = config[key]
opts['filtering_info_links'] = [
FileNameHelper.gitlab_config_webdir + "/" + args.config.split('/')[-1]
]
opts['input_type'] = "MDF"
opts['simulation'] = True
opts["data_type"] = "Upgrade"
conds = test_file_db[config['testfiledb_key']].qualifiers
opts["dddb_tag"] = conds['DDDB']
opts["conddb_tag"] = conds['CondDB']
ifile = fname_helper.event_no_fname(**fname_helper_args)
with open(ifile, 'r') as f:
# json = {stream: [evt_numbers]}
n_triggered_full = len(json.load(f)['full'])
n_hlt2_input = int(parse_yaml(fname_helper.input_nevts_json())['n_evts'])
input_rate = config['input_rate']
opts["input_rate"] = round((input_rate * n_triggered_full / n_hlt2_input),
4)
opts["n_evts"] = n_triggered_full
metadata_config_for_use_in_sprucing_latesthlt2_test = fname_helper.metadata_path(
**fname_helper_args)
with open(metadata_config_for_use_in_sprucing_latesthlt2_test, 'w') as f:
yaml.dump(opts, f, default_flow_style=False)
if __name__ == "__main__":
main()
......@@ -21,9 +21,7 @@ from collections import Counter
import json
import argparse
import csv
import os
import yaml
from PRConfig.bandwidth_helpers import FileNameHelper
from PRConfig.bandwidth_helpers import FileNameHelper, parse_yaml
'''
When running production-stream config, returns:
......@@ -56,11 +54,6 @@ RAW_BANK_TYPES = [(i, LHCb.RawBank.typeName(i))
for i in range(LHCb.RawBank.LastType)]
def parse_yaml(file_path):
with open(os.path.expandvars(file_path), 'r') as f:
return yaml.safe_load(f)
def rawbank_sizes(rawevent, lst):
"""Return (name, size) for each raw bank type."""
if rawevent:
......@@ -231,13 +224,6 @@ def rates_per_stream(events, raw_size, dst_size, streamname, input_rate,
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Inspect Moore output')
parser.add_argument(
'-n',
'--events',
default=-1,
type=lambda x: int(round(float(x))),
help='nb of events to process',
required=True)
parser.add_argument(
'-c',
'--config',
......@@ -262,7 +248,7 @@ if __name__ == '__main__':
fname_helper = FileNameHelper(args.process)
n_events = args.events
n_events = int(parse_yaml(fname_helper.input_nevts_json())['n_evts'])
input_config = parse_yaml(args.config)
......@@ -325,18 +311,18 @@ if __name__ == '__main__':
]
appMgr = GP.AppMgr()
evt = appMgr.evtsvc()
i_rate = int(input_config['input_rate'])
evts_all, rawbanks_all, dst_all, event_stats, exclusive, raw, dst = processing_events_per_line_and_stream(
LHCbApp().EvtMax, lines, args.process)
# Calculate key quantities per stream
rates_per_stream(
evts_all, rawbanks_all, dst_all, args.stream,
input_config['input_rate'],
evts_all, rawbanks_all, dst_all, args.stream, i_rate,
fname_helper.tmp_rate_table_per_stream_path(args.stream_config,
args.stream))
# Calculate key quantities per line
rates_per_line(
event_stats, exclusive, raw, dst, input_config['input_rate'],
event_stats, exclusive, raw, dst, i_rate,
fname_helper.tmp_rate_table_per_line_path(args.stream_config,
args.stream))
......@@ -8,50 +8,27 @@
# granted to it by virtue of its status as an Intergovernmental Organization #
# or submit itself to any jurisdiction. #
###############################################################################
''' Extract all event numbers saved to MDF such that similarity between files can
be calculated later. Saves list of event numbers to json file.
'''
import argparse
import json
import GaudiPython as GP
from GaudiConf.reading import unpack_rawevent
from Configurables import (ApplicationMgr, LHCbApp, IODataManager,
EventSelector, createODIN)
from GaudiConf import IOHelper
import json
from PRConfig.bandwidth_helpers import FileNameHelper
from PRConfig.bandwidth_helpers import FileNameHelper, parse_yaml
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'-n',
'--events',
type=lambda x: int(round(float(x))),
required=True,
help='Max number of events to process')
parser.add_argument(
'-p', '--process', type=str, required=True, choices=['hlt2', 'spruce'])
parser.add_argument(
'-c',
'--stream-config',
type=str,
required=True,
choices=["wg", "production"],
help='Name of the stream config')
parser.add_argument(
'-s', '--stream', type=str, required=True, help='Name of the stream')
args = parser.parse_args()
fname_helper = FileNameHelper(args.process)
def _extract_evt_numbers(ifiles, evtmax, isdigi):
"""
Extract all event numbers saved to a single MDF.
"""
# TODO this is a standard setup for the BW test analysis scripts. Share in a header.
LHCbApp(
DataType="Upgrade",
Simulation=True,
DDDBtag="dddb-20171126",
CondDBtag="sim-20171127-vc-md100",
EvtMax=args.events)
EvtMax=evtmax)
EventSelector(PrintFreq=10000)
IODataManager(DisablePFNWarning=True)
......@@ -59,8 +36,8 @@ def main():
unpack_rawevent(bank_types=['ODIN'], configurables=True),
createODIN(ODIN='myODIN')
])
IOHelper("MDF").inputFiles(
[fname_helper.mdf_fname_for_reading(args.stream_config, args.stream)])
IOHelper("ROOT").inputFiles(ifiles) if isdigi else IOHelper(
"MDF").inputFiles(ifiles)
appMgr = GP.AppMgr()
evt = appMgr.evtsvc()
......@@ -68,7 +45,7 @@ def main():
event_numbers = []
# Loop over all events
i_evt = 0
while i_evt < args.events:
while i_evt < evtmax:
# Iterate 1 event in file
appMgr.run(1)
......@@ -78,6 +55,52 @@ def main():
break # ran out of events in file
event_numbers.append(header.eventNumber())
i_evt += 1
return event_numbers
def input_nevts(input_files, evtmax, isdigi):
"""
For an arbitrary set of input_files, returns: min(total number of events, evtmax).
"""
event_numbers = _extract_evt_numbers(
ifiles=input_files, evtmax=evtmax, isdigi=isdigi)
n_evts = len(event_numbers)
print(f"Found {n_evts} event numbers in input_files")
return n_evts
def main():
"""
For a given stream's MDF output, finds all event_numbers and saves them in a json file for later use.
Useful for similarity between streams later.
"""
parser = argparse.ArgumentParser(description=main.__doc__)
parser.add_argument(
'-p',
'--process',
type=str,
required=True,
choices=['hlt2', 'spruce'],
help="Stage of the trigger.")
parser.add_argument(
'-sc',
'--stream-config',
type=str,
required=True,
choices=["wg", "production"],
help='Name of the stream config')
parser.add_argument(
'-s', '--stream', type=str, required=True, help='Name of the stream')
args = parser.parse_args()
fname_helper = FileNameHelper(args.process)
ifile = fname_helper.mdf_fname_for_reading(args.stream_config, args.stream)
evtmax = int(parse_yaml(fname_helper.input_nevts_json())['n_evts'])
event_numbers = _extract_evt_numbers(
ifiles=[ifile], evtmax=evtmax, isdigi=False)
ofile = fname_helper.event_no_fname(args.stream_config, args.stream)
with open(ofile, 'w') as f:
......
......@@ -38,7 +38,13 @@ HEAPTRACK_ARGS = [
def main(listOfLogs, hltlabel, throughput, produceYAML, ht_file, heaptrack,
perf_exe, no_inline):
llvm_cxxfilt_path = "/cvmfs/sft.cern.ch/lcg/contrib/clang/12/x86_64-centos7/bin/llvm-cxxfilt"
binary_tag = os.environ["BINARY_TAG"]
if "centos7" in binary_tag.split("-"):
llvm_cxxfilt_path = "/cvmfs/sft.cern.ch/lcg/contrib/clang/12/x86_64-centos7/bin/llvm-cxxfilt"
elif "el9" in binary_tag.split("-"):
llvm_cxxfilt_path = "/cvmfs/sft.cern.ch/lcg/releases/clang/14.0.6-14bdb/x86_64-centos9/bin/llvm-cxxfilt"
else:
raise RuntimeError(f"{binary_tag=} not supported")
if os.path.isfile(llvm_cxxfilt_path):
demangle = llvm_cxxfilt_path
else:
......
......@@ -26,7 +26,10 @@ import socket
import tempfile
import atexit
import shutil
import yaml
import json
from PRConfig.bandwidth_helpers import FileNameHelper, parse_yaml
from MooreTests.list_event_numbers import input_nevts
from datetime import datetime
# Default cache dir is the current working directory as this is most convenient for the machine
# that the test runs on periodically. It assumes the working directory is not cleaned up often,
......@@ -36,9 +39,6 @@ DEFAULT_CACHE_DIRS = {'default': ['.']}
# prefer XDG_RUNTIME_DIR which should be on tmpfs
FALLBACK_CACHE_DIR = os.getenv('XDG_RUNTIME_DIR', tempfile.gettempdir())
# Limit size of output log if many options files
MAX_NFILES_TO_PRINT_TO_LOG = 10
def default_cache_dirs():
hostname = socket.getfqdn()
......@@ -50,16 +50,23 @@ def is_remote(url):
return url.startswith('mdf:root:') or url.startswith('root:')
def parse_yaml(file_path):
with open(os.path.expandvars(file_path), 'r') as f:
return yaml.safe_load(f)
def dump_nevts(n_evts, process):
fname_helper = FileNameHelper(process)
ofile = fname_helper.input_nevts_json()
with open(ofile, 'w') as f:
json.dump({"n_evts": f"{n_evts}"}, f)
return 0
def run_gaudi_job(args, config, job_input):
# Build command line
n_evts = input_nevts(
input_files=job_input, evtmax=args.evt_max, isdigi=args.digi)
dump_nevts(n_evts, args.process)
extra_options = [
f"n_threads = {args.threads}", f"n_event_slots = {args.evtSlots}",
f"evt_max = {args.events}",
f"evt_max = {n_evts}",
f"input_raw_format = {config['input_raw_format']}",
f"input_files = {job_input}"
]
......@@ -78,6 +85,11 @@ def run_gaudi_job(args, config, job_input):
if args.download_input_files:
extra_options += ["event_store = 'EvtStoreSvc'", "use_iosvc = True"]
if args.use_manifest:
extra_options += [
f'input_manifest_file = \'{config["input_manifest_file"]}\''
]
extra_options = [f"options.{opt_str}" for opt_str in extra_options]
extra_options.insert(0, "from Moore import options")
......@@ -117,10 +129,10 @@ if __name__ == '__main__':
'1 + # threads)')
parser.add_argument(
'-n',
'--events',
'--evt-max',
default=100,
type=lambda x: int(round(float(x))),
help='nb of events to process per job')
help='maximum nb of events to process per job')
parser.add_argument(
'-a',
'--avg_evt_size',
......@@ -142,13 +154,44 @@ if __name__ == '__main__':
help='Comma separated paths to directories, one per job, where the '
'input files will be cached (default is hostname dependent or '
'$XDG_RUNTIME_DIR).')
parser.add_argument(
'--digi',
default=False,
action='store_true',
help='Flag to download digi files as opposed to the default mdf files')
parser.add_argument(
'-um',
'--use-manifest',
action='store_true',
help=
"Flag to access and include config[input_manifest_file] as an extra option in the job."
)
parser.add_argument(
'--read-evt-max-from-config',
action='store_true',
help="Flag to replace args.evtmax with config[n_evts]")
parser.add_argument(
'-p',
'--process',
type=str,
help='Compute for Hlt1, Hlt2 or Sprucing lines',
choices=['hlt1', 'hlt2', 'spruce'],
required=True)
args = parser.parse_args()
logging.basicConfig(
format='%(levelname)-7s %(message)s',
level=(logging.DEBUG if args.debug else logging.INFO))
if args.events == -1 or args.events > 1e5:
if args.read_evt_max_from_config:
if args.process != "spruce":
raise RuntimeError(
'read_evt_max_from_config only makes sense for Sprucing jobs with config = metadata generated about Hlt2 BW job outputs.'
)
config = parse_yaml(args.config)
args.evt_max = int(config['n_evts'])
if args.evt_max == -1 or args.evt_max > 1e5:
raise RuntimeError(
"The BW tests are limited to 1e5 events to keep them to a reasonable runtime. Please re-configure"
)
......@@ -158,11 +201,18 @@ if __name__ == '__main__':
config = parse_yaml(args.config)
if "testfiledb_key" in config.keys():
if args.use_manifest and "input_manifest_file" not in config.keys():
raise KeyError(
f'{args.config} does not provide "input_manifest_file" but --use-manifest is in use.'
)
# Always use config['input_files'] for inputs if available.
# Otherwise, use config['testfiledb_key'] for inputs.
if "input_files" in config.keys():
inputs_fns = config["input_files"]
elif "testfiledb_key" in config.keys():
from PRConfig.TestFileDB import test_file_db
inputs_fns = test_file_db[config['testfiledb_key']].filenames
elif "input_files" in config.keys():
inputs_fns = config["input_files"]
else:
raise KeyError(
f'{args.config} does not provide either the "testfiledb_key" or "input_files".'
......@@ -170,7 +220,6 @@ if __name__ == '__main__':
job_inputs = [
inputs_fns
] # This is a list to allow for possible NUMA extension: see discussion on !316.
logging.info(inputs_fns[:MAX_NFILES_TO_PRINT_TO_LOG])
# Set up local directories where inputs are cached
if args.download_input_files:
......@@ -190,22 +239,26 @@ if __name__ == '__main__':
# Now download files
for i, inputs in enumerate(job_inputs):
logging.info(
f'Downloading input files {inputs[:MAX_NFILES_TO_PRINT_TO_LOG]}'
)
if all(is_remote(url) for url in inputs):
from Moore.qmtest.context import download_mdf_inputs_locally
# download_mdf_inputs_locally only downloads if files
from Moore.qmtest.context import download_mdf_inputs_locally, download_digi_inputs_locally
# download_inputs_locally only downloads if files
# are not already available locally on the machine
before_copy = datetime.now()
logging.info(
f'Downloading inputs for bandwidth job to {args.cache_dirs[i]}'
)
logging.info(
'Downloading inputs for bandwidth job to {}'.format(
args.cache_dirs[i]))
f'There are {len(inputs)} input files: [{inputs[0]} ' +
']' if len(inputs) < 2 else '{inputs[1]}, ... ]')
kB_to_GB = 1e3
job_inputs[i] = download_mdf_inputs_locally(
download_inputs_locally = download_digi_inputs_locally if args.digi else download_mdf_inputs_locally
job_inputs[i] = download_inputs_locally(
inputs,
args.cache_dirs[i],
max_size=args.avg_evt_size * kB_to_GB * args.events)
logging.info(inputs)
max_size=args.avg_evt_size * kB_to_GB * args.evt_max)
logging.info(
f"Finished file downloads. This took: {datetime.now() - before_copy}"
)
elif any(is_remote(url) for url in inputs_fns):
parser.error('inputs must either be all xrootd or all local')
else:
......
......@@ -59,6 +59,8 @@ import subprocess
import tempfile
from itertools import cycle, islice
IS_EL9 = '-el9-' in os.environ["BINARY_TAG"]
def rep(x, length):
return list(islice(cycle(x), length))
......@@ -220,8 +222,12 @@ options.preamble_algs = [
if args.profile:
# see module docstring for more info about perf
perf_exe = "perf_libunwind_lcg101"
if not has_command(["perf_libunwind_lcg101"]):
if IS_EL9:
perf_exe = "perf"
else:
perf_exe = "perf_libunwind_lcg101"
if not has_command([perf_exe]):
from Moore.qmtest.context import xrdcp
from pathlib import Path
import stat
......
......@@ -14,7 +14,11 @@ import subprocess
from collections import defaultdict
def get_access_urls_mc(bkkpath, evttype, filetypes, max_files=500):
def get_access_urls_mc(bkkpath,
evttype,
filetypes,
sites_to_remove=[],
max_files=500):
customEnv = {}
# set custom grid proxy path if exists
......@@ -66,17 +70,22 @@ def get_access_urls_mc(bkkpath, evttype, filetypes, max_files=500):
# Get the first URL (if more than one) for each LFN, while skipping
# LFNs for which we couldn't find an URL (e.g. a site was down).
lfns = [urls[lfn][0] for lfn in file_list if lfn in urls]
lfns_tmp = [urls[lfn][0] for lfn in file_list if lfn in urls]
# Filter out some failing grid sites/files from the list
excluded = ['stfc.ac.uk']
return [lfn for site in excluded for lfn in lfns if site not in lfn]
excluded = ['stfc.ac.uk'] + sites_to_remove
lfns = [
lfn for lfn in lfns_tmp if not any(site in lfn for site in excluded)
]
lfns = sorted(lfns, key=lambda str: not "eoslhcb.cern.ch" in str)
return lfns
# TODO warn if some of the first N files was not resolved to a URL
# since then one would get numerically different results.
def get_access_urls_data(bkkpath, max_files=500):
def get_access_urls_data(bkkpath, sites_to_remove=[], max_files=500):
customEnv = {}
# set custom grid proxy path if exists
......@@ -102,9 +111,6 @@ def get_access_urls_data(bkkpath, max_files=500):
# ensure files are always in same order
file_list.sort()
if any(file.endswith("raw") for file in file_list):
raise NotImplementedError(
"File ending with 'raw' found, please write a script manually")
print(
"#### Checking output of . /cvmfs/lhcb.cern.ch/lib/LbEnv --quiet; lb-dirac dirac-dms-lfn-accessURL --Terminal"
......@@ -134,12 +140,16 @@ def get_access_urls_data(bkkpath, max_files=500):
# Get the first URL (if more than one) for each LFN, while skipping
# LFNs for which we couldn't find an URL (e.g. a site was down).
lfns = [urls[lfn][0] for lfn in file_list if lfn in urls]
lfns_tmp = [urls[lfn][0] for lfn in file_list if lfn in urls]
# Filter out some failing grid sites/files from the list
excluded = ['stfc.ac.uk']
lfns = [lfn for site in excluded for lfn in lfns if site not in lfn]
excluded = ['stfc.ac.uk'] + sites_to_remove
lfns = [
lfn for lfn in lfns_tmp if not any(site in lfn for site in excluded)
]
lfns = sorted(lfns, key=lambda str: not "eoslhcb.cern.ch" in str)
if any(file.endswith("raw") for file in file_list):
lfns = [lfn for lfn in lfns if "eoslhcb.cern.ch" in lfn]
return lfns
......
......@@ -13482,4 +13482,106 @@ testfiles(
"CondDB": "master",
},
comment='Real data selected by HLT1 from run 269939.',
test_file_db=test_file_db,
)
testfiles(
myname='Upgrade_Bu2TauNu_Tau2PiPiPiNu_HeavyFlavour_MCHit_Filtered',
filenames=[
"root://eoslhcb.cern.ch//eos/lhcb/wg/rta/samples/mc/Upgrade_Bu2TauNu/b2taunu_tau2pipipinu_hf_mchit_filtered_magdown.xdigi"
],
qualifiers={
'Author': 'Maarten van Veghel',
'DataType': 'Upgrade',
'Format': 'XDIGI',
'Date': '2023-08-27',
'Simulation': True,
'DDDB': 'dddb-20210617',
'CondDB': 'sim-20210617-vc-md100',
'GeometryVersion': 'run3/trunk',
'ConditionsVersion': 'jonrob/all-pmts-active',
},
comment=
'Upgrade MC Bu2TauNu, Tau2PiPiPiNu; filtered on having MCHit (in VELO) from Bu or Tau, for heavy-flavour track reco tests',
test_file_db=test_file_db)
testfiles(
myname='exp_24_minbias_Sim10c_magdown',
filenames=[
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00001476_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00001481_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00003710_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00003721_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00005093_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00005160_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00005219_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00006206_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00006666_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00006855_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00006995_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00007281_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00007962_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00008009_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00008538_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00009297_1.digi",
"root://gridproxy@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00204940/0000/00204940_00009449_1.digi",
],
qualifiers={
'Author': 'Luke Grazette',
'Format': 'DIGI',
'DataType': 'Upgrade',
'Date': '2023-11-27',
'Simulation': True,
"GeometryVersion": "run3/trunk",
"ConditionsVersion": "master",
"DDDB": "dddb-20231017",
"CondDB": "sim-20231017-vc-mu100",
},
comment='\n'.join([
"Small amount of exp.24 minbias MC. See bkk for ~30M evts/polarity",
"bkk: /MC/Dev/Beam6800GeV-expected-2024-MagDown-Nu7.6-25ns-Pythia8/Sim10c/30000000/DIGI",
"request: https://gitlab.cern.ch/lhcb-rta/mc-requests/-/issues/15",
"exp24: https://gitlab.cern.ch/lhcb/opg/-/issues/28#note_7278057"
]),
test_file_db=test_file_db)
testfiles(
myname='upgrade-minbias-hlt2-full-output-Dec2023',
filenames=[
"mdf:root://eoslhcb.cern.ch//eos/lhcb/wg/dpa/wp1/hlt2_full_stream_Dec2023/MagDown/hlt2_full_stream_{0}.mdf"
.format(i) for i in list(range(35)) + list(range(36, 240))
] + [
"mdf:root://eoslhcb.cern.ch//eos/lhcb/wg/dpa/wp1/hlt2_full_stream_Dec2023/MagUp/hlt2_full_stream_{0}.mdf"
.format(i) for i in list(range(117)) + list(range(118, 135))
],
qualifiers={
'Author': 'Shunan Zhang',
'Format': 'MDF',
'DataType': 'Upgrade',
'Date': '2023-12-01',
'Simulation': True,
"CondDB": "sim-20171127-vc-md100",
"GeometryVersion": "run3/trunk",
"ConditionsVersion": "master",
"DDDB": "dddb-20171126",
},
comment=
'Hlt2 Full stream output as of December 2023, used as inputs for Sprucing tests, HLT2 output rate around 128 kHz',
test_file_db=test_file_db)
testfiles(
'expected-2024_B2JpsiK_ee_MD', [
'root://x509up_u137380@eoslhcb.cern.ch//eos/lhcb/grid/prod/lhcb/MC/Dev/DIGI/00205665/0000/00205665_00000051_1.digi'
], {
'Author': 'Albert Lopez',
'Format': 'DIGI',
'DataType': 'Upgrade',
'Simulation': True,
'Date': '2023-12-13',
'DDDB': 'dddb-20231017',
'CondDB': 'sim-20231017-vc-md100',
"GeometryVersion": "run3/trunk",
"ConditionsVersion": "master",
},
'Test sample of B2JpsiK with Jpsi2ee for electron run 3 studies, MagDown, Sim10c',
test_file_db=test_file_db)
......@@ -10,6 +10,12 @@
# or submit itself to any jurisdiction. #
###############################################################################
import os
import yaml
def parse_yaml(file_path):
with open(os.path.expandvars(file_path), 'r') as f:
return yaml.safe_load(f)
class FileNameHelper(object):
......@@ -17,6 +23,7 @@ class FileNameHelper(object):
mdf_subdir = 'MDF'
output_subdir = 'Output'
inter_subsubdir = 'Inter'
gitlab_config_webdir = "https://gitlab.cern.ch/lhcb/Moore/-/blob/master/Hlt/Hlt2Conf/tests/options/bandwidth"
def __init__(self, process):
self.process = process
......@@ -28,11 +35,16 @@ class FileNameHelper(object):
def _file_pfx(self):
return f"{self.process}_bw_testing"
def _incomplete_mdf_fname(self, stream_config):
return os.path.join(
self.base_dir, self.mdf_subdir,
self._join(self._file_pfx(), stream_config, "{stream_bit}") +
".mdf")
def _incomplete_mdf_fname(self, stream_config, full_path=True):
fname = self._join(self._file_pfx(), stream_config,
"{stream_bit}") + ".mdf"
return os.path.join(self.base_dir, self.mdf_subdir,
fname) if full_path else fname
def _prwww_path(self, fname, starts_mdf):
lhcbpr_www_dir = "root://eoslhcb.cern.ch//eos/lhcb/storage/lhcbpr/www/UpgradeRateTest/current_hlt2_output"
baseurl = f"mdf:{lhcbpr_www_dir}" if starts_mdf else lhcbpr_www_dir
return os.path.join(baseurl, fname)
def make_tmp_dirs(self):
mdf_dir = os.path.join(self.base_dir, self.mdf_subdir)
......@@ -44,9 +56,9 @@ class FileNameHelper(object):
return self._incomplete_mdf_fname(stream_config).format(
stream_bit="{stream}")
def mdf_fname_for_reading(self, stream_config, stream):
return self._incomplete_mdf_fname(stream_config).format(
stream_bit=stream)
def mdf_fname_for_reading(self, stream_config, stream, full_path=True):
return self._incomplete_mdf_fname(
stream_config, full_path=full_path).format(stream_bit=stream)
def get_stream_from_bw_path(self, bw_file_path):
# useful for globbing
......@@ -54,10 +66,10 @@ class FileNameHelper(object):
stream_and_ext_bit = bw_file_path.split("__")[-1]
return stream_and_ext_bit.split('.')[0]
def tck(self, stream_config):
return os.path.join(
self.base_dir, self.mdf_subdir,
self._join(self._file_pfx(), stream_config) + ".tck.json")
def tck(self, stream_config, full_path=True):
fname = self._join(self._file_pfx(), stream_config) + ".tck.json"
return os.path.join(self.base_dir, self.mdf_subdir,
fname) if full_path else fname
def stream_config_json_path(self, stream_config, full_path=True):
fname = self._join(self._file_pfx(), "streaming",
......@@ -65,6 +77,31 @@ class FileNameHelper(object):
return os.path.join(self.base_dir, self.output_subdir,
fname) if full_path else fname
def metadata_path(self, stream_config, stream, full_path=True):
fname = self._join(self._file_pfx(), stream_config, stream,
"metadata") + ".yaml"
return os.path.join(self.base_dir, self.mdf_subdir,
fname) if full_path else fname
def mdf_prwww_path(self, stream_config, stream):
return self._prwww_path(
fname=self.mdf_fname_for_reading(
stream_config, stream, full_path=False),
starts_mdf=True)
def manifest_prwww_path(self, stream_config):
return self._prwww_path(
fname=self.tck(stream_config, full_path=False), starts_mdf=False)
def metadata_prwww_path(self, stream_config, stream):
return self._prwww_path(
fname=self.metadata_path(stream_config, stream, full_path=False),
starts_mdf=False)
def input_nevts_json(self):
return os.path.join(self.base_dir, self.mdf_subdir,
self._join(self._file_pfx(), "n_evts")) + ".json"
def line_descr_path(self, full_path=True):
fname = self._join(self.process, "line_descriptives") + ".html"
return os.path.join(self.base_dir, self.output_subdir,
......
#!/bin/bash
###############################################################################
# (c) Copyright 2023 CERN for the benefit of the LHCb Collaboration #
# (c) Copyright 2023-2024 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
......@@ -22,8 +22,8 @@ Usage: Moore/run /path/to/Moore_bandwidth_test.sh [options] 2>&1 | tee <path-to-
Expected to be called by e.g. Moore_hlt2_bandwidth.sh for the periodic LHCbPR tests.
--process: "hlt1", "hlt2" or "spruce".
--input-data: "nominal" or "2023".
"2023" not currently available for process == spruce or hlt1.
--input-data: "nominal" or "latest".
"latest" not currently available for process == hlt1 or hlt2.
-h|--help: print this message and exit.
EOF
......@@ -97,19 +97,18 @@ mkdir -p tmp/Output
mkdir -p tmp/Output/Inter
# Set configuration variables and check configuration makes sense
# TODO: Remove Process Dependence on N_EVTS, see: https://gitlab.cern.ch/lhcb-datapkg/PRConfig/-/issues/12
EVTMAX=1e5
case $PROCESS in
hlt1)
MOORE_THREADS=1
TEST_PATH_PREFIX='$HLT1CONFROOT/tests/options/bandwidth/'
EVENT_SIZE_UPPER_LIMIT=200
EVENT_SIZE_UPPER_LIMIT=400
GAUDIRUN_INPUT_PROCESS="Hlt1"
STREAM_CONFIGS=( "streamless" )
case $INPUTDATA in
nominal)
N_EVTS=1e4
CONFIG_FILE="${TEST_PATH_PREFIX}hlt1_bandwidth_input.yaml"
EXTRA_OPTS="-e 1 $MOOREROOT/options/muon_geometry_v2.py" #Requires #EvtSlots==1 due to singlethreading on DIGIs.
EXTRA_OPTS="-e 1 --digi" #Requires #EvtSlots==1 due to singlethreading on DIGIs.
;;
*)
echo "ERROR: --input-data must be \"nominal\" for process \"$PROCESS\""
......@@ -118,8 +117,7 @@ case $PROCESS in
esac
;;
hlt2)
N_EVTS=1e5
MOORE_THREADS=$(nproc)
MOORE_THREADS=${LBN_BUILD_JOBS:-1} # Default to single-threaded
TEST_PATH_PREFIX='$HLT2CONFROOT/tests/options/bandwidth/'
EVENT_SIZE_UPPER_LIMIT=200
GAUDIRUN_INPUT_PROCESS="Hlt2"
......@@ -129,19 +127,14 @@ case $PROCESS in
CONFIG_FILE="${TEST_PATH_PREFIX}hlt2_bandwidth_input_nominal_with_gec.yaml"
EXTRA_OPTS='$MOOREROOT/options/calo_decoding_packed.py $MOOREROOT/options/muon_geometry_v2.py'
;;
2023)
CONFIG_FILE="${TEST_PATH_PREFIX}hlt2_bandwidth_input_2023.yaml"
EXTRA_OPTS="${TEST_PATH_PREFIX}hlt2_bandwidth_input_2023_extra_opts.py"
;;
*)
echo "ERROR: --input-data must be \"nominal\" or \"2023\" for process \"$PROCESS\""
echo "ERROR: --input-data must be \"nominal\" for process \"$PROCESS\""
exit 1
;;
esac
;;
spruce)
N_EVTS=1e5
MOORE_THREADS=$(nproc)
MOORE_THREADS=${LBN_BUILD_JOBS:-1} # Default to single-threaded
TEST_PATH_PREFIX='$HLT2CONFROOT/tests/options/bandwidth/'
EVENT_SIZE_UPPER_LIMIT=300
GAUDIRUN_INPUT_PROCESS="Spruce"
......@@ -149,10 +142,16 @@ case $PROCESS in
case $INPUTDATA in
nominal)
CONFIG_FILE="${TEST_PATH_PREFIX}spruce_bandwidth_input.yaml"
EXTRA_OPTS=''
EXTRA_OPTS='$MOOREROOT/options/muon_geometry_v2.py'
;;
latest)
# "latest" corresponds to using the uploaded full-stream output from a "process=hlt2, input-data=nominal" test.
# These files are overwritten during "lhcb-master" builds of "process=hlt2, input-data=nominal", i.e. ~daily.
CONFIG_FILE="tmp/hlt2_bw_testing__production__full__metadata.yaml"
EXTRA_OPTS='-um --read-evt-max-from-config $MOOREROOT/options/muon_geometry_v2.py'
;;
*)
echo "ERROR: --input-data must be \"nominal\" for process \"$PROCESS\""
echo "ERROR: --input-data must be \"nominal\" or \"latest\" for process \"$PROCESS\""
exit 1
;;
esac
......@@ -167,15 +166,20 @@ esac
# 1. Run Moore.
# -d downloads the input files locally for speed-up running Moore. Not helpful unless that download is fast for you (e.g. you're at CERN)
if [ $PROCESS = "spruce" ] && [ $INPUTDATA = "latest" ]; then
echo "Downloading the Hlt2 output metadata to use as input config."
DOWNLOAD_INPUT_CONFIG_LOCATION=(`python -c "from PRConfig.bandwidth_helpers import FileNameHelper; hlpr = FileNameHelper('hlt2'); print( hlpr.metadata_prwww_path(stream_config='production', stream='full') )"`)
xrdcp -f $DOWNLOAD_INPUT_CONFIG_LOCATION $CONFIG_FILE
STORE_ERR_CODE
fi
for STREAM_CONFIG in "${STREAM_CONFIGS[@]}"; do
echo "Running trigger to obtain MDF files with ${STREAM_CONFIG} streams for comparison over ${CONFIG_FILE}"
time python -m MooreTests.run_bandwidth_test_jobs -d -c=$CONFIG_FILE -n=$N_EVTS -t=$MOORE_THREADS -a=$EVENT_SIZE_UPPER_LIMIT $EXTRA_OPTS "${TEST_PATH_PREFIX}${PROCESS}_bandwidth_${STREAM_CONFIG}_streams.py"
time python -m MooreTests.run_bandwidth_test_jobs -d -c=$CONFIG_FILE -n=$EVTMAX -p=$PROCESS -t=$MOORE_THREADS -a=$EVENT_SIZE_UPPER_LIMIT $EXTRA_OPTS "${TEST_PATH_PREFIX}${PROCESS}_bandwidth_${STREAM_CONFIG}_streams.py"
STORE_ERR_CODE
done
# 2. Compute line descriptives: persist reco, extra output
if [ $PROCESS = "hlt1" ]
then
if [ $PROCESS = "hlt1" ]; then
echo 'Skipping line descriptives as $PROCESS = "hlt1"'
else
echo 'Obtaining line descriptives'
......@@ -191,14 +195,13 @@ for STREAM_CONFIG in "${STREAM_CONFIGS[@]}"; do
echo "Found ${STREAM_CONFIG} streams: ${STREAMS[@]}"
# 4. Compute similarity matrices between streams by comparing event numbers
if [ $PROCESS = "hlt1" ]
then
if [ $PROCESS = "hlt1" ]; then
echo 'Skipping similarity matrix per stream as $PROCESS = "hlt1"'
else
echo "Obtaining similarity matrix for ${STREAM_CONFIG}-stream configuration"
for stream in "${STREAMS[@]}"; do
echo "Stream name: ${stream}"
time python $PRCONFIGROOT/python/MooreTests/list_event_numbers.py -p $PROCESS -n $N_EVTS --stream-config $STREAM_CONFIG --stream $stream
time python $PRCONFIGROOT/python/MooreTests/list_event_numbers.py -p $PROCESS --stream-config $STREAM_CONFIG --stream $stream
STORE_ERR_CODE
done
time python $PRCONFIGROOT/python/MooreTests/calculate_stream_overlap.py -p $PROCESS --stream-config $STREAM_CONFIG --streams ${STREAMS[@]}
......@@ -209,7 +212,7 @@ for STREAM_CONFIG in "${STREAM_CONFIGS[@]}"; do
echo "Obtaining rates and bandwidth for ${STREAM_CONFIG}-stream configuration"
for stream in "${STREAMS[@]}"; do
echo "Stream name: ${stream}"
time python $PRCONFIGROOT/python/MooreTests/line-and-stream-rates.py -c $CONFIG_FILE -n $N_EVTS -p $PROCESS -s $stream --stream-config $STREAM_CONFIG
time python $PRCONFIGROOT/python/MooreTests/line-and-stream-rates.py -c $CONFIG_FILE -p $PROCESS -s $stream --stream-config $STREAM_CONFIG
STORE_ERR_CODE
done
done
......@@ -219,14 +222,13 @@ echo 'Combining all rate and bandwidth tables'
time python $PRCONFIGROOT/python/MooreTests/combine_rate_output.py --process $PROCESS
STORE_ERR_CODE
# 7. Test on the feasibility of a chained HLT2->Sprucing test
if [ $PROCESS = "spruce" ] && [ $INPUTDATA = "nominal" ]
then
echo 'Testing downloads of Hlt2 output for the future'
time python -m MooreTests.download_hlt2_output
# 7. Required information for 'latest' sprucing jobs.
if [ $PROCESS = "hlt2" ] && [ $INPUTDATA = "nominal" ]; then
echo 'Generating yaml metadata to upload to eos for Moore_spruce_latest_bandwidth test'
time python -m MooreTests.generate_hlt2_fullstream_metadata -c $CONFIG_FILE
STORE_ERR_CODE
fi
STORE_ERR_CODE
# 8. Produce plots and HTML pages; add the --building-locally flag to make the links work if you are building the html pages locally
echo 'Making plots and HTML pages'
time python -m MooreTests.make_bandwidth_test_page -p $PROCESS -c $CONFIG_FILE -s $SCRIPT_PATH -e $ERR_CODE
\ No newline at end of file
time python -m MooreTests.make_bandwidth_test_page -p $PROCESS -c $CONFIG_FILE -s $SCRIPT_PATH -e $ERR_CODE
......@@ -22,9 +22,9 @@ fi
export THOR_JIT_N_SPLITS=8
export THOR_JIT_N_JOBS=8
python -m MooreTests.run_throughput_jobs -n=2e4 --avg-event-size=300000 --test-file-db-key=upgrade-minbias-hlt2-full-output-Aug2023 '$MOOREROOT/tests/options/disable-git-metadata-write.py' '$HLT2CONFROOT/options/sprucing/spruce_all_lines.py' "${cache_dirs[@]}"
python -m MooreTests.run_throughput_jobs -n=2e4 --avg-event-size=300000 --test-file-db-key=upgrade-minbias-hlt2-full-output-Dec2023 '$MOOREROOT/tests/options/disable-git-metadata-write.py' '$HLT2CONFROOT/options/sprucing/spruce_all_lines.py' "${cache_dirs[@]}"
python -m MooreTests.run_throughput_jobs -n=-1 -j 1 --profile --avg-event-size=300000 --test-file-db-key=upgrade-minbias-hlt2-full-output-Aug2023 '$MOOREROOT/tests/options/disable-git-metadata-write.py' '$HLT2CONFROOT/options/sprucing/spruce_all_lines.py' "${cache_dirs[@]}"
python -m MooreTests.run_throughput_jobs -n=-1 -j 1 --profile --avg-event-size=300000 --test-file-db-key=upgrade-minbias-hlt2-full-output-Dec2023 '$MOOREROOT/tests/options/disable-git-metadata-write.py' '$HLT2CONFROOT/options/sprucing/spruce_all_lines.py' "${cache_dirs[@]}"
# force 0 return code so the handler runs even for failed jobs
exit 0
#!/bin/bash
###############################################################################
# (c) Copyright 2023 CERN for the benefit of the LHCb Collaboration #
# (c) Copyright 2022-2023 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
......@@ -14,7 +14,7 @@
# this path ends up printed on the BW test page; export so it can be picked up in the child process
export SCRIPT_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )/$(basename "$0")"
$PRCONFIGROOT/scripts/benchmark-scripts/Moore_bandwidth_test.sh --process hlt2 --input-data 2023
$PRCONFIGROOT/scripts/benchmark-scripts/Moore_bandwidth_test.sh --process spruce --input-data latest
# force 0 return code so the handler runs even for failed jobs
exit 0
exit 0
\ No newline at end of file