From 16813425aa59bb019ad1e809afbee4bb0e8fb9f3 Mon Sep 17 00:00:00 2001 From: farm <chenjia.zhang@cern.ch> Date: Thu, 14 Sep 2023 17:03:41 +0800 Subject: [PATCH] add more function --- python/PRConfig/FilesFromDirac.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/python/PRConfig/FilesFromDirac.py b/python/PRConfig/FilesFromDirac.py index f303d908..e9c167b1 100644 --- a/python/PRConfig/FilesFromDirac.py +++ b/python/PRConfig/FilesFromDirac.py @@ -14,7 +14,7 @@ import subprocess from collections import defaultdict -def get_access_urls_mc(bkkpath, evttype, filetypes, max_files=500): +def get_access_urls_mc(bkkpath, evttype, filetypes, sites_to_remove=[], max_files=500): customEnv = {} # set custom grid proxy path if exists @@ -66,17 +66,20 @@ def get_access_urls_mc(bkkpath, evttype, filetypes, max_files=500): # Get the first URL (if more than one) for each LFN, while skipping # LFNs for which we couldn't find an URL (e.g. a site was down). - lfns = [urls[lfn][0] for lfn in file_list if lfn in urls] + lfns_tmp = [urls[lfn][0] for lfn in file_list if lfn in urls] # Filter out some failing grid sites/files from the list - excluded = ['stfc.ac.uk'] - return [lfn for site in excluded for lfn in lfns if site not in lfn] + excluded = ['stfc.ac.uk'] + sites_to_remove + lfns = [lfn for lfn in lfns_tmp if not any(site in lfn for site in excluded)] + lfns = sorted(lfns, key=lambda str: not "eoslhcb.cern.ch" in str) + + return lfns # TODO warn if some of the first N files was not resolved to a URL # since then one would get numerically different results. -def get_access_urls_data(bkkpath, max_files=500): +def get_access_urls_data(bkkpath, sites_to_remove=[], max_files=500): customEnv = {} # set custom grid proxy path if exists @@ -134,11 +137,11 @@ def get_access_urls_data(bkkpath, max_files=500): # Get the first URL (if more than one) for each LFN, while skipping # LFNs for which we couldn't find an URL (e.g. a site was down). - lfns = [urls[lfn][0] for lfn in file_list if lfn in urls] + lfns_tmp = [urls[lfn][0] for lfn in file_list if lfn in urls] # Filter out some failing grid sites/files from the list - excluded = ['stfc.ac.uk'] - lfns = [lfn for site in excluded for lfn in lfns if site not in lfn] + excluded = ['stfc.ac.uk'] + sites_to_remove + lfns = [lfn for lfn in lfns_tmp if not any(site in lfn for site in excluded)] lfns = sorted(lfns, key=lambda str: not "eoslhcb.cern.ch" in str) return lfns @@ -147,13 +150,4 @@ def get_access_urls_data(bkkpath, max_files=500): # since then one would get numerically different results. -if __name__ == "__main__": - # bkk_path = ("/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu7.6-25ns-Pythia8" - # "/Sim10-Up08/Digi15-Up04") - # evt_type = "30000000" - bkk_path_for_data = ( - "/LHCb/Collision18/Beam6500GeV-VeloClosed-MagDown/Real\ Data/Reco18/Stripping34/90000000/BHADRON.MDST" - ) - # for url in get_access_urls_mc(bkk_path, evt_type, ['XDIGI']): - for url in get_access_urls_data(bkk_path_for_data, max_files=10): - print(url) + -- GitLab