From 16813425aa59bb019ad1e809afbee4bb0e8fb9f3 Mon Sep 17 00:00:00 2001
From: farm <chenjia.zhang@cern.ch>
Date: Thu, 14 Sep 2023 17:03:41 +0800
Subject: [PATCH] add more function

---
 python/PRConfig/FilesFromDirac.py | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/python/PRConfig/FilesFromDirac.py b/python/PRConfig/FilesFromDirac.py
index f303d908..e9c167b1 100644
--- a/python/PRConfig/FilesFromDirac.py
+++ b/python/PRConfig/FilesFromDirac.py
@@ -14,7 +14,7 @@ import subprocess
 from collections import defaultdict
 
 
-def get_access_urls_mc(bkkpath, evttype, filetypes, max_files=500):
+def get_access_urls_mc(bkkpath, evttype, filetypes, sites_to_remove=[], max_files=500):
     customEnv = {}
 
     # set custom grid proxy path if exists
@@ -66,17 +66,20 @@ def get_access_urls_mc(bkkpath, evttype, filetypes, max_files=500):
 
     # Get the first URL (if more than one) for each LFN, while skipping
     # LFNs for which we couldn't find an URL (e.g. a site was down).
-    lfns = [urls[lfn][0] for lfn in file_list if lfn in urls]
+    lfns_tmp = [urls[lfn][0] for lfn in file_list if lfn in urls]
 
     # Filter out some failing grid sites/files from the list
-    excluded = ['stfc.ac.uk']
-    return [lfn for site in excluded for lfn in lfns if site not in lfn]
+    excluded = ['stfc.ac.uk'] + sites_to_remove
+    lfns = [lfn for lfn in lfns_tmp if not any(site in lfn for site in excluded)]
+    lfns = sorted(lfns, key=lambda str: not "eoslhcb.cern.ch" in str)
+
+    return lfns
 
     # TODO warn if some of the first N files was not resolved to a URL
     # since then one would get numerically different results.
 
 
-def get_access_urls_data(bkkpath, max_files=500):
+def get_access_urls_data(bkkpath, sites_to_remove=[], max_files=500):
     customEnv = {}
 
     # set custom grid proxy path if exists
@@ -134,11 +137,11 @@ def get_access_urls_data(bkkpath, max_files=500):
 
     # Get the first URL (if more than one) for each LFN, while skipping
     # LFNs for which we couldn't find an URL (e.g. a site was down).
-    lfns = [urls[lfn][0] for lfn in file_list if lfn in urls]
+    lfns_tmp = [urls[lfn][0] for lfn in file_list if lfn in urls]
 
     # Filter out some failing grid sites/files from the list
-    excluded = ['stfc.ac.uk']
-    lfns = [lfn for site in excluded for lfn in lfns if site not in lfn]
+    excluded = ['stfc.ac.uk'] + sites_to_remove
+    lfns = [lfn for lfn in lfns_tmp if not any(site in lfn for site in excluded)]
     lfns = sorted(lfns, key=lambda str: not "eoslhcb.cern.ch" in str)
 
     return lfns
@@ -147,13 +150,4 @@ def get_access_urls_data(bkkpath, max_files=500):
     # since then one would get numerically different results.
 
 
-if __name__ == "__main__":
-    # bkk_path = ("/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu7.6-25ns-Pythia8"
-    # "/Sim10-Up08/Digi15-Up04")
-    # evt_type = "30000000"
-    bkk_path_for_data = (
-        "/LHCb/Collision18/Beam6500GeV-VeloClosed-MagDown/Real\ Data/Reco18/Stripping34/90000000/BHADRON.MDST"
-    )
-    # for url in get_access_urls_mc(bkk_path, evt_type, ['XDIGI']):
-    for url in get_access_urls_data(bkk_path_for_data, max_files=10):
-        print(url)
+
-- 
GitLab