Add dumb script to download (and time the download) of a HLT2 output

63bdc493 · Ross John Hunter · ac9460ce · 63bdc493
Commit 63bdc493 authored 1 year ago by Ross John Hunter
--- a/python/MooreTests/download_hlt2_output.py
+++ b/python/MooreTests/download_hlt2_output.py
+#!/usr/bin/env python
+###############################################################################
+# (c) Copyright 2023 CERN for the benefit of the LHCb Collaboration           #
+#                                                                             #
+# This software is distributed under the terms of the GNU General Public      #
+# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING".   #
+#                                                                             #
+# In applying this licence, CERN does not waive the privileges and immunities #
+# granted to it by virtue of its status as an Intergovernmental Organization  #
+# or submit itself to any jurisdiction.                                       #
+###############################################################################
+
+import socket
+import os
+import atexit
+import tempfile
+import logging
+import datetime
+import shutil
+from Moore.qmtest.context import download_mdf_inputs_locally
+
+# Default cache dir is the current working directory as this is most convenient for the machine
+# that the test runs on periodically. It assumes the working directory is not cleaned up often,
+# and so the files remain available for subsequent jobs.
+DEFAULT_CACHE_DIRS = {'default': '.'}
+
+# prefer XDG_RUNTIME_DIR which should be on tmpfs
+FALLBACK_CACHE_DIR = os.getenv('XDG_RUNTIME_DIR', tempfile.gettempdir())
+
+FILE_TO_COPY = "mdf:root://eoslhcb.cern.ch//eos/lhcb/storage/lhcbpr/www/UpgradeRateTest/hlt2_bw_testing__production__full.mdf"
+
+def default_cache_dirs():
+    hostname = socket.getfqdn()
+    dirs = DEFAULT_CACHE_DIRS.get(hostname, DEFAULT_CACHE_DIRS['default'])
+    return dirs
+
+def main():
+
+    logging.basicConfig(
+        format='%(levelname)-7s %(message)s',
+        level=(logging.DEBUG if args.debug else logging.INFO))
+
+    cache_dir = default_cache_dirs()
+    if not os.path.isdir(cache_dir):
+        fallback_dir = tempfile.mkdtemp(
+            prefix='bandwidth-', dir=FALLBACK_CACHE_DIR)
+        logging.warning(
+            'default cache dir {!r} doesnt exist, using {}'.format(
+                cache_dir, fallback_dir))
+        cache_dir = fallback_dir
+        # if we use the fallback directory, clean up after ourselves
+        atexit.register(shutil.rmtree, fallback_dir)
+
+    # Now download file
+    logging.info(
+        f'Downloading input file {FILE_TO_COPY}'
+    )
+    # download_mdf_inputs_locally only downloads if files
+    # are not already available locally on the machine
+    logging.info(
+        f'Downloading inputs for bandwidth job to {cache_dir}')
+    before_copy = datetime.now()
+    kB_to_GB = 1e3
+    downloaded_path = download_mdf_inputs_locally(
+        FILE_TO_COPY,
+        cache_dir,
+        max_size=300 * kB_to_GB * 2e4) # Guesses as to output size and n_events in the FULL stream TODO improve
+    logging.info(f"Downloaded {downloaded_path}. This took: {datetime.now() - before_copy}")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file