From 5cb026fc15917a5925ea4e75e48621eac5b7ac47 Mon Sep 17 00:00:00 2001
From: Marco Clemencic <marco.clemencic@cern.ch>
Date: Tue, 8 Dec 2020 10:55:19 +0100
Subject: [PATCH] Use XRootD also for listing

---
 python/LbNightlyTools/Scripts/Install.py | 35 ++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/python/LbNightlyTools/Scripts/Install.py b/python/LbNightlyTools/Scripts/Install.py
index 89da1a57..c0182358 100644
--- a/python/LbNightlyTools/Scripts/Install.py
+++ b/python/LbNightlyTools/Scripts/Install.py
@@ -30,8 +30,10 @@ from subprocess import Popen, PIPE, call, STDOUT
 from tempfile import mkstemp
 from datetime import datetime
 from socket import gethostname
-
-from joblib import Parallel, delayed
+try:
+    from urllib.parse import urlparse, urlunparse
+except ImportError:  # Python2
+    from urlparse import urlparse, urlunparse
 
 ARTIFACTS_URL = 'https://lhcb-nightlies-artifacts.web.cern.ch/lhcb-nightlies-artifacts'
 ARTIFACTS_URL_XROOTD = 'root://eosproject.cern.ch//eos/project/l/lhcbwebsites/www/lhcb-nightlies-artifacts'
@@ -58,6 +60,12 @@ def _list_http(url):
     The HTTP server must allow listing of directories with the typical Apache
     format.
     '''
+    if url.startswith(ARTIFACTS_URL):
+        try:
+            return _list_xrootd(
+                url.replace(ARTIFACTS_URL, ARTIFACTS_URL_XROOTD))
+        except Exception:
+            pass
 
     class ListHTMLParser(HTMLParser.HTMLParser):
         '''
@@ -106,12 +114,32 @@ def _list_ssh(url):
     return proc.communicate()[0].decode('utf-8', errors='replace').splitlines()
 
 
+def _list_xrootd(url):
+    '''
+    List a directory using "xrdfs .. ls ..."
+    '''
+    u = urlparse(url)
+    proc = Popen([
+        "xrdfs",
+        urlunparse((u.scheme, u.netloc, "", "", "", "")), "ls", u.path
+    ],
+                 stdout=PIPE)
+    root = u.path if u.path.endswith("/") else (u.path + "/")
+    # "xrdfs ls" always returns full path, so we have to strip that part
+    return [
+        f.replace(root, "") for f in proc.communicate()[0].decode(
+            'utf-8', errors='replace').splitlines()
+    ]
+
+
 def _url_protocol(url):
     '''
     @return the protocol id of the given URL
     '''
     if re.match(r'https?://', url):
         return 'http'
+    elif url.startswith("root://"):
+        return 'root'
     elif re.match(r'([a-z0-9]+@)?[a-z][a-z0-9.]*:', url):
         return 'ssh'
     else:
@@ -127,6 +155,7 @@ def listdir(url):
     listing = {
         'http': _list_http,
         'ssh': _list_ssh,
+        'root': _list_xrootd,
         'file': os.listdir
     }[protocol](url)
     return sorted(listing)
@@ -504,6 +533,8 @@ class Script(PlainScript):
             no_build_dir=True)
 
     def main(self):
+        from joblib import Parallel, delayed
+
         # split the 'comma-separated list' options
         opts = self.options
         if opts.projects:
-- 
GitLab