diff --git a/options/Moore/DataChallenges/DC_Sim10aU1_B2Kstee.py b/options/Moore/DataChallenges/DC_Sim10aU1_B2Kstee.py index fca2191c665f274fe77171bbc60721649fa34e3c..819e51fc0f6259b29066535df4ac23e65f24c6bc 100644 --- a/options/Moore/DataChallenges/DC_Sim10aU1_B2Kstee.py +++ b/options/Moore/DataChallenges/DC_Sim10aU1_B2Kstee.py @@ -10,9 +10,9 @@ ############################################################################### from Moore import options -from PRConfig.FilesFromDirac import get_access_urls +from PRConfig.FilesFromDirac import get_access_urls_mc -options.input_files = get_access_urls( +options.input_files = get_access_urls_mc( "/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu7.6-25ns-Pythia8/Sim10aU1", "11124001", ["XDIGI"]) options.input_type = "ROOT" diff --git a/options/Moore/DataChallenges/DC_Sim10aU1_B2Kstee_LowLumi.py b/options/Moore/DataChallenges/DC_Sim10aU1_B2Kstee_LowLumi.py index ef398c250f58ccfabe3c0e8012ea0d6eb0a591b0..e1ca6cbab61ccd248a6771cdf7c218766a7e7815 100644 --- a/options/Moore/DataChallenges/DC_Sim10aU1_B2Kstee_LowLumi.py +++ b/options/Moore/DataChallenges/DC_Sim10aU1_B2Kstee_LowLumi.py @@ -10,9 +10,9 @@ ############################################################################### from Moore import options -from PRConfig.FilesFromDirac import get_access_urls +from PRConfig.FilesFromDirac import get_access_urls_mc -options.input_files = get_access_urls( +options.input_files = get_access_urls_mc( "/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu3.8-25ns-Pythia8/Sim10aU1", "11124001", ["XDIGI"]) options.input_type = "ROOT" diff --git a/options/Moore/DataChallenges/DC_Sim10aU1_Bd2Kstgamma.py b/options/Moore/DataChallenges/DC_Sim10aU1_Bd2Kstgamma.py index 706f88d2ef5c239b7f3c4051b32c842720e340b2..46746b6a019f258706b2a5cce9f04d09ed67a961 100644 --- a/options/Moore/DataChallenges/DC_Sim10aU1_Bd2Kstgamma.py +++ b/options/Moore/DataChallenges/DC_Sim10aU1_Bd2Kstgamma.py @@ -10,9 +10,9 @@ ############################################################################### from Moore import options -from PRConfig.FilesFromDirac import get_access_urls +from PRConfig.FilesFromDirac import get_access_urls_mc -options.input_files = get_access_urls( +options.input_files = get_access_urls_mc( "/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu7.6-25ns-Pythia8/Sim10aU1", "11102202", ["XDIGI"]) options.input_type = "ROOT" diff --git a/options/Moore/DataChallenges/DC_Sim10aU1_Bd2Kstgamma_LowLumi.py b/options/Moore/DataChallenges/DC_Sim10aU1_Bd2Kstgamma_LowLumi.py index 4124c2b25386c59b5768fe440933907365b8625a..781a73763818e2f3bcadfbc166a2b0f010154e79 100644 --- a/options/Moore/DataChallenges/DC_Sim10aU1_Bd2Kstgamma_LowLumi.py +++ b/options/Moore/DataChallenges/DC_Sim10aU1_Bd2Kstgamma_LowLumi.py @@ -10,9 +10,9 @@ ############################################################################### from Moore import options -from PRConfig.FilesFromDirac import get_access_urls +from PRConfig.FilesFromDirac import get_access_urls_mc -options.input_files = get_access_urls( +options.input_files = get_access_urls_mc( "/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu3.8-25ns-Pythia8/Sim10aU1", "11102202", ["XDIGI"]) options.input_type = "ROOT" diff --git a/options/Moore/DataChallenges/DC_Sim10aU1_Bs2JPsiPhi.py b/options/Moore/DataChallenges/DC_Sim10aU1_Bs2JPsiPhi.py index fc24b4ffb897b44bb0f9d796b495c147363419c0..65e0ab300a9d34d27f2449366d4757c94371e41b 100644 --- a/options/Moore/DataChallenges/DC_Sim10aU1_Bs2JPsiPhi.py +++ b/options/Moore/DataChallenges/DC_Sim10aU1_Bs2JPsiPhi.py @@ -10,9 +10,9 @@ ############################################################################### from Moore import options -from PRConfig.FilesFromDirac import get_access_urls +from PRConfig.FilesFromDirac import get_access_urls_mc -options.input_files = get_access_urls( +options.input_files = get_access_urls_mc( "/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu7.6-25ns-Pythia8/Sim10aU1", "13144011", ["XDIGI"]) options.input_type = "ROOT" diff --git a/options/Moore/DataChallenges/DC_Sim10aU1_Bs2JPsiPhi_LowLumi.py b/options/Moore/DataChallenges/DC_Sim10aU1_Bs2JPsiPhi_LowLumi.py index b892cdff0333a8a50e7e01cf55d6126796169a9c..181ceaa06f770707d5fbaa3edbc73ded5c234dbb 100644 --- a/options/Moore/DataChallenges/DC_Sim10aU1_Bs2JPsiPhi_LowLumi.py +++ b/options/Moore/DataChallenges/DC_Sim10aU1_Bs2JPsiPhi_LowLumi.py @@ -10,9 +10,9 @@ ############################################################################### from Moore import options -from PRConfig.FilesFromDirac import get_access_urls +from PRConfig.FilesFromDirac import get_access_urls_mc -options.input_files = get_access_urls( +options.input_files = get_access_urls_mc( "/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu3.8-25ns-Pythia8/Sim10aU1", "13144011", ["XDIGI"]) options.input_type = "ROOT" diff --git a/options/Moore/DataChallenges/DC_Sim10aU1_Bs2PhiPhi.py b/options/Moore/DataChallenges/DC_Sim10aU1_Bs2PhiPhi.py index 083715d4a462876356530c3704c2122b0a37c0b4..d0239389951664c572be95b643c26add5114a959 100644 --- a/options/Moore/DataChallenges/DC_Sim10aU1_Bs2PhiPhi.py +++ b/options/Moore/DataChallenges/DC_Sim10aU1_Bs2PhiPhi.py @@ -10,9 +10,9 @@ ############################################################################### from Moore import options -from PRConfig.FilesFromDirac import get_access_urls +from PRConfig.FilesFromDirac import get_access_urls_mc -options.input_files = get_access_urls( +options.input_files = get_access_urls_mc( "/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu7.6-25ns-Pythia8/Sim10aU1", "13104012", ["XDIGI"]) options.input_type = "ROOT" diff --git a/options/Moore/DataChallenges/DC_Sim10aU1_Bs2PhiPhi_LowLumi.py b/options/Moore/DataChallenges/DC_Sim10aU1_Bs2PhiPhi_LowLumi.py index 655ac12a4693e46b0fcd2518afe475be1bce7d07..1cf5532c3490083aa94b4227d3a0c13f02943263 100644 --- a/options/Moore/DataChallenges/DC_Sim10aU1_Bs2PhiPhi_LowLumi.py +++ b/options/Moore/DataChallenges/DC_Sim10aU1_Bs2PhiPhi_LowLumi.py @@ -10,9 +10,9 @@ ############################################################################### from Moore import options -from PRConfig.FilesFromDirac import get_access_urls +from PRConfig.FilesFromDirac import get_access_urls_mc -options.input_files = get_access_urls( +options.input_files = get_access_urls_mc( "/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu3.8-25ns-Pythia8/Sim10aU1", "13104012", ["XDIGI"]) options.input_type = "ROOT" diff --git a/options/Moore/DataChallenges/DC_Sim10aU1_Dst2D0pi.py b/options/Moore/DataChallenges/DC_Sim10aU1_Dst2D0pi.py index 45d2073f70c7d99dd7ec6aae517e75356733f907..2eb4a844a7940b737fbe188908984205a63c6d6c 100644 --- a/options/Moore/DataChallenges/DC_Sim10aU1_Dst2D0pi.py +++ b/options/Moore/DataChallenges/DC_Sim10aU1_Dst2D0pi.py @@ -10,9 +10,9 @@ ############################################################################### from Moore import options -from PRConfig.FilesFromDirac import get_access_urls +from PRConfig.FilesFromDirac import get_access_urls_mc -options.input_files = get_access_urls( +options.input_files = get_access_urls_mc( "/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu7.6-25ns-Pythia8/Sim10aU1", "27265100", ["XDIGI"]) options.input_type = "ROOT" diff --git a/options/Moore/DataChallenges/DC_Sim10aU1_Dst2D0pi_LowLumi.py b/options/Moore/DataChallenges/DC_Sim10aU1_Dst2D0pi_LowLumi.py index 565f34b936faac0979ed19231309faec90367230..d385af8625be3f477501beb3ad04d60567f3a2c9 100644 --- a/options/Moore/DataChallenges/DC_Sim10aU1_Dst2D0pi_LowLumi.py +++ b/options/Moore/DataChallenges/DC_Sim10aU1_Dst2D0pi_LowLumi.py @@ -10,9 +10,9 @@ ############################################################################### from Moore import options -from PRConfig.FilesFromDirac import get_access_urls +from PRConfig.FilesFromDirac import get_access_urls_mc -options.input_files = get_access_urls( +options.input_files = get_access_url_mc( "/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu3.8-25ns-Pythia8/Sim10aU1", "27265100", ["XDIGI"]) options.input_type = "ROOT" diff --git a/options/Moore/DataChallenges/DC_Sim10aU1_MinBias.py b/options/Moore/DataChallenges/DC_Sim10aU1_MinBias.py index 6da067cc7596c2756ef91a6e82a1a52f085de1f6..c89595283e8510b211298559a00cf656a85369f7 100644 --- a/options/Moore/DataChallenges/DC_Sim10aU1_MinBias.py +++ b/options/Moore/DataChallenges/DC_Sim10aU1_MinBias.py @@ -10,9 +10,9 @@ ############################################################################### from Moore import options -from PRConfig.FilesFromDirac import get_access_urls +from PRConfig.FilesFromDirac import get_access_urls_mc -options.input_files = get_access_urls( +options.input_files = get_access_urls_mc( "/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu7.6-25ns-Pythia8/Sim10aU1", "30000000", ["XDIGI"]) options.input_type = "ROOT" diff --git a/options/Moore/DataChallenges/DC_Sim10aU1_MinBias_LowLumi.py b/options/Moore/DataChallenges/DC_Sim10aU1_MinBias_LowLumi.py index 81c820f8cdb6fe7affa590505df192f64ff8befc..675dea6a9c955c71ea7749ab8f86ca5bf52f7d91 100644 --- a/options/Moore/DataChallenges/DC_Sim10aU1_MinBias_LowLumi.py +++ b/options/Moore/DataChallenges/DC_Sim10aU1_MinBias_LowLumi.py @@ -10,9 +10,9 @@ ############################################################################### from Moore import options -from PRConfig.FilesFromDirac import get_access_urls +from PRConfig.FilesFromDirac import get_access_urls_mc -options.input_files = get_access_urls( +options.input_files = get_access_urls_mc( "/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu3.8-25ns-Pythia8/Sim10aU1", "30000000", ["XDIGI"]) options.input_type = "ROOT" diff --git a/options/Moore/DataChallenges/DC_Sim10aU1_Z2mumu.py b/options/Moore/DataChallenges/DC_Sim10aU1_Z2mumu.py index e462aeee6092154d6a73504fae1666ce28440239..0586265b2c39886e3e1d3310d02afbc8b407fcfc 100644 --- a/options/Moore/DataChallenges/DC_Sim10aU1_Z2mumu.py +++ b/options/Moore/DataChallenges/DC_Sim10aU1_Z2mumu.py @@ -10,9 +10,9 @@ ############################################################################### from Moore import options -from PRConfig.FilesFromDirac import get_access_urls +from PRConfig.FilesFromDirac import get_access_urls_mc -options.input_files = get_access_urls( +options.input_files = get_access_urls_mc( "/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu7.6-25ns-Pythia8/Sim10aU1", "42112000", ["XDIGI"]) options.input_type = "ROOT" diff --git a/options/Moore/DataChallenges/DC_Sim10aU1_Z2mumu_LowLumi.py b/options/Moore/DataChallenges/DC_Sim10aU1_Z2mumu_LowLumi.py index 9fcd7849688805696328c085c8b5319848d9fab8..3225a2b26803b22f17540e75bca2fa75466053ed 100644 --- a/options/Moore/DataChallenges/DC_Sim10aU1_Z2mumu_LowLumi.py +++ b/options/Moore/DataChallenges/DC_Sim10aU1_Z2mumu_LowLumi.py @@ -10,9 +10,9 @@ ############################################################################### from Moore import options -from PRConfig.FilesFromDirac import get_access_urls +from PRConfig.FilesFromDirac import get_access_urls_mc -options.input_files = get_access_urls( +options.input_files = get_access_urls_mc( "/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu3.8-25ns-Pythia8/Sim10aU1", "42112000", ["XDIGI"]) options.input_type = "ROOT" diff --git a/options/Moore/DataChallenges/DC_Sim10b_Digi16_MinBias_HLT2_pp_commissioning.py b/options/Moore/DataChallenges/DC_Sim10b_Digi16_MinBias_HLT2_pp_commissioning.py index 9ed862c31d9b2616ca3b5730eea89082b900cc9b..6cf264e83ccb0cbee89ea000f5f1107456f50206 100644 --- a/options/Moore/DataChallenges/DC_Sim10b_Digi16_MinBias_HLT2_pp_commissioning.py +++ b/options/Moore/DataChallenges/DC_Sim10b_Digi16_MinBias_HLT2_pp_commissioning.py @@ -10,9 +10,9 @@ ############################################################################### from Moore import options -from PRConfig.FilesFromDirac import get_access_urls +from PRConfig.FilesFromDirac import get_access_urls_mc -options.input_files = get_access_urls( +options.input_files = get_access_urls_mc( "/MC/2022/Beam6800GeV-2022-MagDown-NoUT-Nu2.1-25ns-Pythia8/Sim10b/Digi16/hlt1_pp_no_gec_no_ut/HLT2-pp-commissioning", "30000000", ["DST"]) diff --git a/python/PRConfig/FilesFromDirac.py b/python/PRConfig/FilesFromDirac.py index 0b9e827875c0420a87977f55a1faafcca2a4dbee..f303d90899ca992478fefbc6ff9a4a9718d7c14c 100644 --- a/python/PRConfig/FilesFromDirac.py +++ b/python/PRConfig/FilesFromDirac.py @@ -14,7 +14,7 @@ import subprocess from collections import defaultdict -def get_access_urls(bkkpath, evttype, filetypes, max_files=500): +def get_access_urls_mc(bkkpath, evttype, filetypes, max_files=500): customEnv = {} # set custom grid proxy path if exists @@ -76,9 +76,84 @@ def get_access_urls(bkkpath, evttype, filetypes, max_files=500): # since then one would get numerically different results. +def get_access_urls_data(bkkpath, max_files=500): + customEnv = {} + + # set custom grid proxy path if exists + try: + customEnv["X509_USER_PROXY"] = os.environ["X509_USER_PROXY"] + print("Found X509_USER_PROXY set to {}".format( + customEnv["X509_USER_PROXY"])) + except: + print("No X509_USER_PROXY found, continuing with defaults...") + + print("getting list of files from Dirac") + stdout = subprocess.check_output( + ". /cvmfs/lhcb.cern.ch/lib/LbEnv --quiet;" + "lb-dirac dirac-dms-list-directory -B {}".format(bkkpath), + shell=True, + env=customEnv, + text=True, + ) + + # remove general directory name and number of files from list + # e.g. "/lhcb/MC/Upgrade/XDIGI/00128250/0000/: 14 files, 0 sub-directories" + file_list = [x for x in stdout.splitlines() if ":" not in x] + + # ensure files are always in same order + file_list.sort() + if any(file.endswith("raw") for file in file_list): + raise NotImplementedError( + "File ending with 'raw' found, please write a script manually") + + print( + "#### Checking output of . /cvmfs/lhcb.cern.ch/lib/LbEnv --quiet; lb-dirac dirac-dms-lfn-accessURL --Terminal" + ) + try: + stdout = subprocess.check_output( + ". /cvmfs/lhcb.cern.ch/lib/LbEnv --quiet;" + "lb-dirac dirac-dms-lfn-accessURL --Terminal", + shell=True, + env=customEnv, + text=True, + input="\n".join(file_list[:max_files])) + except subprocess.CalledProcessError as e: + stdout = e.output + + print("#### Storing URLs") + urls = defaultdict(list) + successful = False + for line in stdout.splitlines(): + if line[0] != ' ': + successful = line.startswith('Successful') + else: + if successful: + m = re.match(r' +([^: ]*) *: *(.*)', line) + if m: + urls[m.group(1)].append(m.group(2)) + + # Get the first URL (if more than one) for each LFN, while skipping + # LFNs for which we couldn't find an URL (e.g. a site was down). + lfns = [urls[lfn][0] for lfn in file_list if lfn in urls] + + # Filter out some failing grid sites/files from the list + excluded = ['stfc.ac.uk'] + lfns = [lfn for site in excluded for lfn in lfns if site not in lfn] + lfns = sorted(lfns, key=lambda str: not "eoslhcb.cern.ch" in str) + + return lfns + + # TODO warn if some of the first N files was not resolved to a URL + # since then one would get numerically different results. + + if __name__ == "__main__": - bkk_path = ("/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu7.6-25ns-Pythia8" - "/Sim10-Up08/Digi15-Up04") - evt_type = "30000000" - for url in get_access_urls(bkk_path, evt_type, ['XDIGI']): + # bkk_path = ("/MC/Upgrade/Beam7000GeV-Upgrade-MagDown-Nu7.6-25ns-Pythia8" + # "/Sim10-Up08/Digi15-Up04") + # evt_type = "30000000" + bkk_path_for_data = ( + "/LHCb/Collision18/Beam6500GeV-VeloClosed-MagDown/Real\ Data/Reco18/Stripping34/90000000/BHADRON.MDST" + ) + # for url in get_access_urls_mc(bkk_path, evt_type, ['XDIGI']): + for url in get_access_urls_data(bkk_path_for_data, max_files=10): print(url)