From 5cb026fc15917a5925ea4e75e48621eac5b7ac47 Mon Sep 17 00:00:00 2001 From: Marco Clemencic <marco.clemencic@cern.ch> Date: Tue, 8 Dec 2020 10:55:19 +0100 Subject: [PATCH] Use XRootD also for listing --- python/LbNightlyTools/Scripts/Install.py | 35 ++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/python/LbNightlyTools/Scripts/Install.py b/python/LbNightlyTools/Scripts/Install.py index 89da1a57..c0182358 100644 --- a/python/LbNightlyTools/Scripts/Install.py +++ b/python/LbNightlyTools/Scripts/Install.py @@ -30,8 +30,10 @@ from subprocess import Popen, PIPE, call, STDOUT from tempfile import mkstemp from datetime import datetime from socket import gethostname - -from joblib import Parallel, delayed +try: + from urllib.parse import urlparse, urlunparse +except ImportError: # Python2 + from urlparse import urlparse, urlunparse ARTIFACTS_URL = 'https://lhcb-nightlies-artifacts.web.cern.ch/lhcb-nightlies-artifacts' ARTIFACTS_URL_XROOTD = 'root://eosproject.cern.ch//eos/project/l/lhcbwebsites/www/lhcb-nightlies-artifacts' @@ -58,6 +60,12 @@ def _list_http(url): The HTTP server must allow listing of directories with the typical Apache format. ''' + if url.startswith(ARTIFACTS_URL): + try: + return _list_xrootd( + url.replace(ARTIFACTS_URL, ARTIFACTS_URL_XROOTD)) + except Exception: + pass class ListHTMLParser(HTMLParser.HTMLParser): ''' @@ -106,12 +114,32 @@ def _list_ssh(url): return proc.communicate()[0].decode('utf-8', errors='replace').splitlines() +def _list_xrootd(url): + ''' + List a directory using "xrdfs .. ls ..." + ''' + u = urlparse(url) + proc = Popen([ + "xrdfs", + urlunparse((u.scheme, u.netloc, "", "", "", "")), "ls", u.path + ], + stdout=PIPE) + root = u.path if u.path.endswith("/") else (u.path + "/") + # "xrdfs ls" always returns full path, so we have to strip that part + return [ + f.replace(root, "") for f in proc.communicate()[0].decode( + 'utf-8', errors='replace').splitlines() + ] + + def _url_protocol(url): ''' @return the protocol id of the given URL ''' if re.match(r'https?://', url): return 'http' + elif url.startswith("root://"): + return 'root' elif re.match(r'([a-z0-9]+@)?[a-z][a-z0-9.]*:', url): return 'ssh' else: @@ -127,6 +155,7 @@ def listdir(url): listing = { 'http': _list_http, 'ssh': _list_ssh, + 'root': _list_xrootd, 'file': os.listdir }[protocol](url) return sorted(listing) @@ -504,6 +533,8 @@ class Script(PlainScript): no_build_dir=True) def main(self): + from joblib import Parallel, delayed + # split the 'comma-separated list' options opts = self.options if opts.projects: -- GitLab