diff --git a/Tools/ART/python/ART/__init__.py b/Tools/ART/python/ART/__init__.py index 423193cc52bd18a9327792eafce9142b186b7bfc..83f3adc32ad22eff9c7d9f08203397dea1528c7d 100644 --- a/Tools/ART/python/ART/__init__.py +++ b/Tools/ART/python/ART/__init__.py @@ -1,5 +1,11 @@ -# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration -"""TBD.""" +# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration +""" +Import default Classes. + +Allows one to do: + +from ART import ArtBase +""" from art_base import ArtBase from art_build import ArtBuild diff --git a/Tools/ART/python/ART/art_base.py b/Tools/ART/python/ART/art_base.py index 5812f8759aa2473951f8d6828231afe8d4f810fe..1175dae2956dfc45f06f30522b18aa869938ce98 100755 --- a/Tools/ART/python/ART/art_base.py +++ b/Tools/ART/python/ART/art_base.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration -"""TBD.""" +# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration +"""Base class for grid and (local) build submits.""" __author__ = "Tulay Cuhadar Donszelmann <tcuhadar@cern.ch>" @@ -10,56 +10,57 @@ import json import logging import os import re -import yaml try: import scandir as scan except ImportError: import os as scan -from art_misc import is_exe, run_command +from art_configuration import ArtConfiguration +# from art_diff import ArtDiff from art_header import ArtHeader +from art_misc import is_exe, run_command MODULE = "art.base" class ArtBase(object): - """TBD.""" + """Base class for grid and (local) build submits.""" def __init__(self, art_directory): - """TBD.""" + """Keep arguments.""" self.art_directory = art_directory def task_list(self, job_type, sequence_tag): - """TBD.""" + """Default implementation.""" self.not_implemented() def task(self, package, job_type, sequence_tag): - """TBD.""" + """Default implementation.""" self.not_implemented() def job(self, package, job_type, sequence_tag, index, out): - """TBD.""" + """Default implementation.""" self.not_implemented() def compare(self, package, test_name, days, file_names): - """TBD.""" + """Default implementation.""" self.not_implemented() def list(self, package, job_type, json_format=False): - """TBD.""" + """Default implementation.""" self.not_implemented() def log(self, package, test_name): - """TBD.""" + """Default implementation.""" self.not_implemented() def output(self, package, test_name, file_name): - """TBD.""" + """Default implementation.""" self.not_implemented() def validate(self, script_directory): - """TBD.""" + """Validate all tests in given script_directory.""" log = logging.getLogger(MODULE) directories = self.get_test_directories(script_directory.rstrip("/")) @@ -82,7 +83,7 @@ class ArtBase(object): return 0 def included(self, script_directory, job_type, index_type, nightly_release, project, platform): - """TBD.""" + """Print all included tests for these arguments.""" log = logging.getLogger(MODULE) directories = self.get_test_directories(script_directory.rstrip("/")) for directory in directories.itervalues(): @@ -93,51 +94,45 @@ class ArtBase(object): log.info("%s %s", test_name, ArtHeader(test_name).get(ArtHeader.ART_INCLUDE)) return 0 - def download(self, input_file): - """TBD.""" - return self.get_input(input_file) - - def diff_pool(self, file_name, ref_file): - """TBD.""" - import PyUtils.PoolFile as PF - - # diff-pool - df = PF.DiffFiles(refFileName=ref_file, chkFileName=file_name, ignoreList=['RecoTimingObj_p1_RAWtoESD_timings', 'RecoTimingObj_p1_ESDtoAOD_timings']) - df.printSummary() - stat = df.status() - print stat - del df - - return stat - - def diff_root(self, file_name, ref_file, entries=-1): - """TBD.""" + def config(self, package, nightly_release, project, platform, config): + """Show configuration.""" log = logging.getLogger(MODULE) + config = ArtConfiguration(config) + if package is None: + log.info("%s", config.packages()) + return 0 - # diff-root - (code, out, err) = run_command("acmd.py diff-root " + file_name + " " + ref_file + " --error-mode resilient --ignore-leaves RecoTimingObj_p1_HITStoRDO_timings RecoTimingObj_p1_RAWtoESD_mems RecoTimingObj_p1_RAWtoESD_timings RAWtoESD_mems RAWtoESD_timings ESDtoAOD_mems ESDtoAOD_timings HITStoRDO_timings RAWtoALL_mems RAWtoALL_timings RecoTimingObj_p1_RAWtoALL_mems RecoTimingObj_p1_RAWtoALL_timings RecoTimingObj_p1_EVNTtoHITS_timings --entries " + str(entries)) - if code != 0: - log.error("Error: %d", code) - print(err) - - log.info(out) - return code + keys = config.keys(nightly_release, project, platform, package) + for key in keys: + log.info("%s %s", key, config.get(nightly_release, project, platform, package, key)) + return 0 # # Default implementations # - def compare_ref(self, file_name, ref_file, entries=-1): + def compare_ref(self, path, ref_path, entries=-1): """TBD.""" result = 0 - result |= self.diff_pool(file_name, ref_file) - result |= self.diff_root(file_name, ref_file, entries) + (exit_code, out, err, command, start_time, end_time) = run_command(' '.join(("art-diff.py", "--diff-type=diff-pool", path, ref_path))) + if exit_code != 0: + result |= exit_code + print err + print out + + (exit_code, out, err, command, start_time, end_time) = run_command(' '.join(("art-diff.py", "--diff-type=diff-root", "--entries=" + str(entries), path, ref_path))) + if exit_code != 0: + result |= exit_code + print err + print out + return result # # Protected Methods # - def get_art_results(self, output): + @staticmethod + def get_art_results(output): """ Extract art-results. @@ -161,23 +156,13 @@ class ArtBase(object): return result - def get_config(self): - """Retrieve dictionary of ART configuration file, or None if file does not exist.""" - try: - config_file = open("art-configuration.yml", "r") - config = yaml.load(config_file) - config_file.close() - return config - except IOError: - return None - def get_files(self, directory, job_type=None, index_type="all", nightly_release=None, project=None, platform=None): """ Return a list of all test files matching 'test_*.sh' of given 'job_type', 'index_type' and nightly/project/platform. - 'index_type' can be 'all', 'batch' or 'single'. + 'job_type' can be 'grid' or 'build', given by the test - If "given" is None, all files are returned. + 'index_type' can be 'all', 'batch' or 'single'. Only the filenames are returned. """ @@ -224,6 +209,8 @@ class ArtBase(object): """ result = {} for root, dirs, files in scan.walk(directory): + # exclude some directories + dirs[:] = [d for d in dirs if not d.endswith('_test.dir')] if root.endswith('/test'): package = os.path.basename(os.path.dirname(root)) result[package] = root @@ -242,7 +229,7 @@ class ArtBase(object): for pattern in patterns: nightly_release_pattern = "*" project_pattern = "*" - platform_pattern = "*-*-*-opt" + platform_pattern = "*-*-*-*" count = pattern.count('/') if count >= 2: @@ -256,23 +243,9 @@ class ArtBase(object): return True return False - def get_input(self, input_name): - """Download input file from rucio. Retuns path of inputfile.""" - work_dir = '.' - - # run in correct environment - env = os.environ.copy() - env['PATH'] = '.:' + env['PATH'] - - (code, out, err) = run_command(os.path.join(self.art_directory, "art-get-input.sh") + " " + input_name, dir=work_dir, env=env) - if code == 0 and out != '': - return os.path.join(work_dir, input_name.replace(':', '/', 1)) - - return None - # # Private Methods # def not_implemented(self): - """TBD.""" + """Default Not Implemented Method.""" raise NotImplementedError("Class %s doesn't implement method: %s(...)" % (self.__class__.__name__, inspect.stack()[1][3])) diff --git a/Tools/ART/python/ART/art_build.py b/Tools/ART/python/ART/art_build.py index ddd0a73f243e95c27a59e470b61f4dcef407828a..67f9aecdcb9c80408fc4b9438f0602a74c6b9faa 100644 --- a/Tools/ART/python/ART/art_build.py +++ b/Tools/ART/python/ART/art_build.py @@ -1,10 +1,11 @@ #!/usr/bin/env python -# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration -"""TBD.""" +# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration +"""Class for (local) build submits.""" __author__ = "Tulay Cuhadar Donszelmann <tcuhadar@cern.ch>" import collections +import concurrent.futures import fnmatch import json import logging @@ -15,27 +16,30 @@ from art_misc import run_command, mkdir_p from art_base import ArtBase from art_header import ArtHeader -from parallelScheduler import ParallelScheduler - MODULE = "art.build" -def run_job(art_directory, sequence_tag, script_directory, package, job_type, index, test_name, nightly_release, project, platform, nightly_tag): - """TBD.""" - log = logging.getLogger(MODULE) +def run_job(art_directory, sequence_tag, script_directory, package, job_type, job_index, test_name): + """ + Job to be run by parallel or serial scheduler. - log.info("job started %s %s %s %s %s %d %s %s %s %s %s", art_directory, sequence_tag, script_directory, package, job_type, index, test_name, nightly_release, project, platform, nightly_tag) - (exit_code, out, err) = run_command(' '.join((os.path.join(art_directory, './art-internal.py'), "job", "build", script_directory, package, job_type, sequence_tag, str(index), "out", nightly_release, project, platform, nightly_tag))) - log.info("job ended %s %s %s %s %s %d %s %s %s %s %s", art_directory, sequence_tag, script_directory, package, job_type, index, test_name, nightly_release, project, platform, nightly_tag) + Needs to be defined outside a class. + Names of arguments are important, see call to scheduler. + """ + # <script_directory> <sequence_tag> <package> <outfile> <job_type> <job_index> + log = logging.getLogger(MODULE) + log.info("job started %s %s %s %s %s %d %s", art_directory, sequence_tag, script_directory, package, job_type, job_index, test_name) + (exit_code, out, err, command, start_time, end_time) = run_command(' '.join((os.path.join(art_directory, './art-internal.py'), "build", "job", script_directory, sequence_tag, package, "out", job_type, str(job_index)))) + log.info("job ended %s %s %s %s %s %d %s", art_directory, sequence_tag, script_directory, package, job_type, job_index, test_name) - return (test_name, exit_code, out, err) + return (package, test_name, exit_code, out, err, start_time, end_time) class ArtBuild(ArtBase): - """TBD.""" + """Class for (local) build submits.""" def __init__(self, art_directory, nightly_release, project, platform, nightly_tag, script_directory, max_jobs=0, ci=False): - """TBD.""" + """Keep arguments.""" super(ArtBuild, self).__init__(art_directory) log = logging.getLogger(MODULE) log.debug("ArtBuild %s %s %d", art_directory, script_directory, max_jobs) @@ -49,33 +53,63 @@ class ArtBuild(ArtBase): self.ci = ci def task_list(self, job_type, sequence_tag): - """TBD.""" + """Run a list of packages for given job_type with sequence_tag.""" log = logging.getLogger(MODULE) log.debug("task_list %s %s", job_type, sequence_tag) test_directories = self.get_test_directories(self.script_directory) if not test_directories: log.warning('No tests found in directories ending in "test"') - status = collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict())) + log.info("Executor started with %d threads", self.max_jobs) + executor = concurrent.futures.ThreadPoolExecutor(max_workers=self.max_jobs) + future_set = [] for package, directory in test_directories.items(): + future_set.extend(self.task(executor, package, job_type, sequence_tag)) + + # Create status of all packages + status = collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict())) + + # Some release information + status['release_info']['nightly_release'] = self.nightly_release + status['release_info']['nightly_tag'] = self.nightly_tag + status['release_info']['project'] = self.project + status['release_info']['platform'] = self.platform + + # Package information with all tests in each package + for future in concurrent.futures.as_completed(future_set): + (package, test_name, exit_code, out, err, start_time, end_time) = future.result() + log.debug("Handling job for %s %s", package, test_name) + status[package][test_name]['exit_code'] = exit_code + # Removed, seem to give empty lines + # status[package][test_name]['out'] = out + # status[package][test_name]['err'] = err + status[package][test_name]['start_time'] = start_time.strftime('%Y-%m-%dT%H:%M:%S') + status[package][test_name]['end_time'] = end_time.strftime('%Y-%m-%dT%H:%M:%S') + status[package][test_name]['start_epoch'] = start_time.strftime('%s') + status[package][test_name]['end_epoch'] = end_time.strftime('%s') + test_directory = os.path.abspath(test_directories[package]) - job_results = self.task(package, job_type, sequence_tag) - for job_result in job_results: - test_name = job_result[0] - status[package][test_name]['exit_code'] = job_result[1] - status[package][test_name]['out'] = job_result[2] - status[package][test_name]['err'] = job_result[3] - status[package][test_name]['test_directory'] = test_directory - - # gather results - result = [] - log.debug("Looking for results for test %s", test_name) - with open(os.path.join(sequence_tag, package, os.path.splitext(test_name)[0], 'stdout.txt'), 'r') as f: + fname = os.path.join(test_directory, test_name) + if os.path.exists(fname): + status[package][test_name]['description'] = ArtHeader(fname).get(ArtHeader.ART_DESCRIPTION) + else: + log.warning("Test file cannot be opened to get description: %s", fname) + status[package][test_name]['description'] = "" + status[package][test_name]['test_directory'] = test_directory + + # gather results + result = [] + stdout_path = os.path.join(sequence_tag, package, os.path.splitext(test_name)[0], 'stdout.txt') + log.debug("Looking for results in %s", stdout_path) + if os.path.exists(stdout_path): + with open(stdout_path, 'r') as f: output = f.read() - result = self.get_art_results(output) + result = ArtBase.get_art_results(output) + else: + log.warning("Output file does not exist: %s", stdout_path) - status[package][job_result[0]]['result'] = result + status[package][test_name]['result'] = result mkdir_p(sequence_tag) with open(os.path.join(sequence_tag, "status.json"), 'w') as outfile: @@ -83,16 +117,18 @@ class ArtBuild(ArtBase): return 0 - def task(self, package, job_type, sequence_tag): - """TBD.""" + def task(self, executor, package, job_type, sequence_tag): + """Run tests of a single package.""" log = logging.getLogger(MODULE) log.debug("task %s %s %s", package, job_type, sequence_tag) test_directories = self.get_test_directories(self.script_directory) test_directory = os.path.abspath(test_directories[package]) test_names = self.get_files(test_directory, job_type, "all", self.nightly_release, self.project, self.platform) - scheduler = ParallelScheduler(self.max_jobs + 1) + if not test_names: + log.debug("No tests found for package %s and job_type %s", package, job_type) - index = 0 + future_set = [] + job_index = 0 for test_name in test_names: schedule_test = False fname = os.path.join(test_directory, test_name) @@ -111,28 +147,38 @@ class ArtBuild(ArtBase): log.warning("job skipped, file not executable: %s", fname) if schedule_test: - scheduler.add_task(task_name="t" + str(index), dependencies=[], description="d", target_function=run_job, function_kwargs={'art_directory': self.art_directory, 'sequence_tag': sequence_tag, 'script_directory': self.script_directory, 'package': package, 'job_type': job_type, 'index': index, 'test_name': test_name, 'nightly_release': self.nightly_release, 'project': self.project, 'platform': self.platform, 'nightly_tag': self.nightly_tag}) - index += 1 + future_set.append(executor.submit(run_job, self.art_directory, sequence_tag, self.script_directory, package, job_type, job_index, test_name)) + job_index += 1 - result = scheduler.run() - return result + return future_set - def job(self, package, job_type, sequence_tag, index, out): - """TBD.""" + def job(self, sequence_tag, package, out, job_type, job_index): + """Run a single test.""" log = logging.getLogger(MODULE) - log.debug("job %s %s %s %d %s", package, job_type, sequence_tag, index, out) + log.debug("ArtBuild job %s %s %s %d %s", package, job_type, sequence_tag, job_index, out) test_directories = self.get_test_directories(self.script_directory) test_directory = os.path.abspath(test_directories[package]) - test_name = self.get_files(test_directory, job_type, "all", self.nightly_release, self.project, self.platform)[int(index)] + test_name = self.get_files(test_directory, job_type, "all", self.nightly_release, self.project, self.platform)[int(job_index)] work_directory = os.path.join(sequence_tag, package, os.path.splitext(test_name)[0]) mkdir_p(work_directory) - - (exit_code, output, err) = run_command(' '.join((os.path.join(test_directory, test_name), '.', package, job_type, test_name, self.nightly_release, self.project, self.platform, self.nightly_tag)), dir=work_directory) + log.debug("Work dir %s", work_directory) + + # Tests are called with arguments: PACKAGE TEST_NAME SCRIPT_DIRECTORY TYPE + script_directory = '.' + env = os.environ.copy() + env['ArtScriptDirectory'] = script_directory + env['ArtPackage'] = package + env['ArtJobType'] = job_type + env['ArtJobName'] = test_name + cmd = ' '.join((os.path.join(test_directory, test_name), package, test_name, script_directory, job_type)) + (exit_code, output, err, command, start_time, end_time) = run_command(cmd, dir=work_directory, env=env) with open(os.path.join(work_directory, "stdout.txt"), "w") as text_file: + log.debug("Copying stdout into %s", work_directory) text_file.write(output) with open(os.path.join(work_directory, "stderr.txt"), "w") as text_file: + log.debug("Copying stderr into %s", work_directory) text_file.write(err) return exit_code diff --git a/Tools/ART/python/ART/art_configuration.py b/Tools/ART/python/ART/art_configuration.py new file mode 100644 index 0000000000000000000000000000000000000000..d2d15ff940074ed1cbad382783aae386654651e4 --- /dev/null +++ b/Tools/ART/python/ART/art_configuration.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python +# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration +"""Interface to the general ART configuration.""" + +__author__ = "Tulay Cuhadar Donszelmann <tcuhadar@cern.ch>" + +import fnmatch +import logging +import yaml + +MODULE = "art.configuration" + + +class ArtConfiguration(object): + """Class to interface to the ART configuration.""" + + ALL = 'All' + SEPARATOR = '/' + + def __init__(self, config_file=None): + """Init.""" + log = logging.getLogger(MODULE) + if config_file is None: + config_file = 'art-configuration.yml' + try: + f = open(config_file, "r") + self.config = yaml.load(f) + f.close() + except IOError: + log.critical("Cannot read %s", config_file) + exit(2) + + def release_key(self, nightly_release, project, platform): + """ + Return release key. + + Format is: /21.0/Athena/x86_64-slc6-gcc62-opt + """ + return ArtConfiguration.SEPARATOR + ArtConfiguration.SEPARATOR.join((nightly_release, project, platform)) + + def release_key_compare(self, x, y): + """Compare two release keys.""" + xa = x.split(ArtConfiguration.SEPARATOR) + ya = y.split(ArtConfiguration.SEPARATOR) + + for index, item in sorted(enumerate(xa), None, None, True): + if xa[index] < ya[index]: + return -1 + elif xa[index] > ya[index]: + return +1 + return 0 + + def keys(self, nightly_release, project, platform, package=None): + """Return all keys for all matching patterns for one specific package.""" + if self.config is None: + return [] + + if package is None: + package = ArtConfiguration.ALL + + if package not in self.config: + return [] + + keys = [] + for pattern in self.config[package]: + if fnmatch.fnmatch(self.release_key(nightly_release, project, platform), pattern): + for key in self.config[package][pattern].keys(): + if key not in keys: + keys.append(key) + + return keys + + def packages(self): + """Return all packages, including 'All', defined in the configuration.""" + if self.config is None: + return [] + + return self.config.keys() + + def get(self, nightly_release, project, platform, package, key, default_value=None): + """Return most specific value for specified key and matching pattern. + + By specifying more specific release_keys in the file [/21.0/*...] + one can override less specific keys [/*/*...] + (order in the file is not important): + + Tier0ChainTests: + /*/*/*: + dst: /yourlocaldirectory + /21.0/*/*: + dst: /eos/atlas/atlascerngroupdisk/data-art/grid-output + + """ + log = logging.getLogger(MODULE) + log.debug("Looking for %s %s %s %s %s", nightly_release, project, platform, package, key) + if self.config is None: + log.debug("No configuration") + return default_value + + if package is None: + log.debug("%s used for package", ArtConfiguration.ALL) + package = ArtConfiguration.ALL + + if package not in self.config: + log.debug("%s not in config", package) + return default_value + + value = default_value + for pattern in sorted(self.config[package], self.release_key_compare): + release_key = self.release_key(nightly_release, project, platform) + log.debug("release_key %s", release_key) + # print key, pattern + if fnmatch.fnmatch(release_key, pattern): + log.debug("matched %s", pattern) + release = self.config[package][pattern] + if key in release: + value = release[key] + + log.debug("Value %s", value) + return value + + def get_option(self, nightly_release, project, platform, package, key, option_key): + """TBD.""" + value = self.get(nightly_release, project, platform, package, key) + return option_key + value if value is not None else '' diff --git a/Tools/ART/python/ART/art_grid.py b/Tools/ART/python/ART/art_grid.py index 43d034d81b3e708f0cdaba3476adf23c8c26ae90..ea938ebe9e3bb5b3c8cd49cd7c99458417e6d356 100644 --- a/Tools/ART/python/ART/art_grid.py +++ b/Tools/ART/python/ART/art_grid.py @@ -1,16 +1,18 @@ #!/usr/bin/env python -# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration -"""TBD.""" +# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration +"""Class for grid submission.""" __author__ = "Tulay Cuhadar Donszelmann <tcuhadar@cern.ch>" import atexit -import datetime +import concurrent.futures import glob import json import logging +import multiprocessing import os import re +import requests import shutil import sys import tarfile @@ -18,59 +20,70 @@ import tempfile import time import urllib2 -try: - import rucio.client - RUCIO = True -except ImportError: - # NOTE: defer logging as level is not set yet - RUCIO = False +from datetime import datetime +from datetime import timedelta from art_base import ArtBase +from art_configuration import ArtConfiguration from art_header import ArtHeader -from art_misc import mkdir_p, make_executable, run_command +from art_rucio import ArtRucio +from art_misc import mkdir_p, make_executable, run_command, run_command_parallel MODULE = "art.grid" +def copy_job(art_directory, indexed_package, dst): + """ + Copy job to be run by executor. + + Needs to be defined outside a class. + Names of arguments are important, see call to scheduler. + """ + log = logging.getLogger(MODULE) + log.info("job started %s %s %s", art_directory, indexed_package, dst) + (exit_code, out, err, command, start_time, end_time) = run_command(' '.join((os.path.join(art_directory, './art.py'), "copy", "--dst=" + dst, indexed_package))) + log.info("job ended %s %s %s", art_directory, indexed_package, dst) + + print "Exit Code:", exit_code + print "Out: ", out + print "Err: ", err + + return (indexed_package, exit_code, out, err, start_time, end_time) + + class ArtGrid(ArtBase): - """TBD.""" + """Class for grid submission.""" CVMFS_DIRECTORY = '/cvmfs/atlas-nightlies.cern.ch/repo/sw' EOS_MGM_URL = 'root://eosatlas.cern.ch/' EOS_OUTPUT_DIR = '/eos/atlas/atlascerngroupdisk/data-art/grid-output' - LOG = '.log' - JSON = '_EXT0' - OUTPUT = '_EXT1' - ARTPROD = 'artprod' - ART_JOB = 'art-job.json' - LOG_TGZ = 'log.tgz' - JOB_TAR = 'job.tar' JOB_REPORT = 'jobReport.json' JOB_REPORT_ART_KEY = 'art' + RESULT_WAIT_INTERVAL = 5 * 60 - ATHENA_STDOUT = 'athena_stdout.txt' - RESULT_WAIT_INTERVAL = 300 - - def __init__(self, art_directory, nightly_release, project, platform, nightly_tag, script_directory=None, skip_setup=False, submit_directory=None): - """TBD.""" + def __init__(self, art_directory, nightly_release, project, platform, nightly_tag, script_directory=None, skip_setup=False, submit_directory=None, max_jobs=0): + """Keep arguments.""" super(ArtGrid, self).__init__(art_directory) self.nightly_release = nightly_release + self.nightly_release_short = re.sub(r"-VAL-.*", "-VAL", self.nightly_release) self.project = project self.platform = platform self.nightly_tag = nightly_tag self.script_directory = script_directory self.skip_setup = skip_setup self.submit_directory = submit_directory - self.rucio_cache = os.path.join(tempfile.gettempdir(), "rucio-cache") + self.max_jobs = multiprocessing.cpu_count() if max_jobs <= 0 else max_jobs + + self.rucio = ArtRucio(self.art_directory, self.nightly_release_short, project, platform, nightly_tag) def status(self, status): """Print status for usage in gitlab-ci.""" print 'art-status:', status def get_script_directory(self): - """On demand script directory, only to be called if directory exists.""" + """Return calculated script directory, sometimes overriden by commandline.""" if self.script_directory is None: self.script_directory = ArtGrid.CVMFS_DIRECTORY self.script_directory = os.path.join(self.script_directory, self.nightly_release) # e.g. 21.0 @@ -96,34 +109,44 @@ class ArtGrid(ArtBase): self.status('error') exit(1) - def copy_art(self, run_dir): + def exit_if_outfile_too_long(self, outfile_test): + """Exit with ERROR if outfile too long.""" + log = logging.getLogger(MODULE) + MAX_OUTFILE_LEN = 132 + if len(outfile_test) > MAX_OUTFILE_LEN: + log.error('OutFile string length > %d: %s', MAX_OUTFILE_LEN, outfile_test) + exit(1) + + def copy_art(self, art_python, run_dir): """Copy all art files to the the run directory. Returns final script directory to be used.""" log = logging.getLogger(MODULE) ART = os.path.join(run_dir, "ART") mkdir_p(ART) # get the path of the python classes and support scripts - art_python_directory = os.path.join(self.art_directory, '..', 'python', 'ART') + art_python_directory = os.path.join(self.art_directory, art_python, 'ART') shutil.copy(os.path.join(self.art_directory, 'art.py'), run_dir) - shutil.copy(os.path.join(self.art_directory, 'art-get-input.sh'), run_dir) - shutil.copy(os.path.join(self.art_directory, 'art-get-tar.sh'), run_dir) + shutil.copy(os.path.join(self.art_directory, 'art-diff.py'), run_dir) shutil.copy(os.path.join(self.art_directory, 'art-internal.py'), run_dir) + shutil.copy(os.path.join(self.art_directory, 'art-task-grid.sh'), run_dir) + shutil.copy(os.path.join(self.art_directory, 'art-download.sh'), run_dir) shutil.copy(os.path.join(art_python_directory, '__init__.py'), ART) shutil.copy(os.path.join(art_python_directory, 'art_base.py'), ART) shutil.copy(os.path.join(art_python_directory, 'art_build.py'), ART) + shutil.copy(os.path.join(art_python_directory, 'art_configuration.py'), ART) shutil.copy(os.path.join(art_python_directory, 'art_grid.py'), ART) shutil.copy(os.path.join(art_python_directory, 'art_header.py'), ART) shutil.copy(os.path.join(art_python_directory, 'art_misc.py'), ART) + shutil.copy(os.path.join(art_python_directory, 'art_rucio.py'), ART) shutil.copy(os.path.join(art_python_directory, 'docopt.py'), ART) shutil.copy(os.path.join(art_python_directory, 'docopt_dispatch.py'), ART) - shutil.copy(os.path.join(art_python_directory, 'parallelScheduler.py'), ART) - shutil.copy(os.path.join(art_python_directory, 'serialScheduler.py'), ART) make_executable(os.path.join(run_dir, 'art.py')) - make_executable(os.path.join(run_dir, 'art-get-input.sh')) - make_executable(os.path.join(run_dir, 'art-get-tar.sh')) + make_executable(os.path.join(run_dir, 'art-diff.py')) make_executable(os.path.join(run_dir, 'art-internal.py')) + make_executable(os.path.join(run_dir, 'art-task-grid.sh')) + make_executable(os.path.join(run_dir, 'art-download.sh')) script_directory = self.get_script_directory() @@ -139,47 +162,14 @@ class ArtGrid(ArtBase): def get_jedi_id(self, text): """Return Jedi Task Id or 0.""" match = re.search(r"jediTaskID=(\d+)", text) - return match.group(1) if match else 0 - - def get_nightly_release_short(self): - """Return a short version of the nightly release.""" - return re.sub(r"-VAL-.*", "-VAL", self.nightly_release) + return match.group(1) if match else -1 - def get_outfile(self, user, package, sequence_tag=0, test_name=None, nightly_tag=None): - """Create outfile from parameters.""" - log = logging.getLogger(MODULE) - - if nightly_tag is None: - nightly_tag = self.nightly_tag - - if sequence_tag == 0: - if not RUCIO: - log.critical("RUCIO not available") - exit(1) - - scope = '.'.join(('user', user)) - outfile = '.'.join(('user', user, 'atlas', self.get_nightly_release_short(), self.project, self.platform, nightly_tag, '*', package, 'log')) - rucio_client = rucio.client.Client() - for out in rucio_client.list_dids(scope, {'name': outfile}): - outfile = os.path.splitext(out)[0] - else: - outfile = '.'.join(('user', user, 'atlas', self.get_nightly_release_short(), self.project, self.platform, nightly_tag, sequence_tag, package)) - return outfile if test_name is None else '.'.join((outfile, test_name)) - - def copy(self, package, dst=None, user=None): + def copy(self, indexed_package, dst=None, user=None): """Copy output from scratch area to eos area.""" log = logging.getLogger(MODULE) - real_user = os.getenv('USER', ArtGrid.ARTPROD) - user = real_user if user is None else user - default_dst = ArtGrid.EOS_OUTPUT_DIR if real_user == ArtGrid.ARTPROD else '.' - dst = default_dst if dst is None else dst - if package is not None: - log.info("Copy %s", package) - outfile = self.get_outfile(user, package) - log.info("Copying from %s", outfile) - - return self.copy_output(outfile, dst) + if indexed_package is not None: + return self.copy_package(indexed_package, dst, user) # make sure script directory exist self.exit_if_no_script_directory() @@ -191,288 +181,152 @@ class ArtGrid(ArtBase): # copy results for all packages result = 0 - for package, root in test_directories.items(): + for indexed_package, root in test_directories.items(): number_of_tests = len(self.get_files(root, "grid", "all", self.nightly_release, self.project, self.platform)) if number_of_tests > 0: - log.info("Copy %s", package) - outfile = self.get_outfile(user, package) - log.info("Copying from %s", outfile) - - result |= self.copy_output(outfile, dst) + result |= self.copy_package(indexed_package, dst, user) return result - # Not used yet - def download(self, did): - """Download did into temp directory.""" - log = logging.getLogger(MODULE) - if not RUCIO: - log.critical("RUCIO not available") - exit(1) - - # rucio downloads cache properly - (exit_code, out, err) = run_command("rucio download --dir " + self.rucio_cache + " " + did) - if (exit_code != 0): - log.error(err) - log.info(out) - return exit_code - - # Not used yet - def get_job_name(self, user, index, package, sequence_tag, nightly_tag): - """ - Return job name for index. - - job_name is without .sh or .py - """ - log = logging.getLogger(MODULE) - if not RUCIO: - log.critical("RUCIO not available") - exit(1) - - outfile = self.get_outfile(user, package, sequence_tag=sequence_tag, nightly_tag=nightly_tag) - log.debug("outfile %s", outfile) - - container_json = outfile + ArtGrid.JSON - container_log = outfile + ArtGrid.LOG - log.info("Downloading json") - self.download(container_json) - log.info("Downloading log") - self.download(container_log) - - index_formatted = index - indexed_json = os.path.join(container_json, '.'.join((container_json, sequence_tag, index_formatted, ArtGrid.JSON))) - log.debug("Looking for json") - if os.path.exists(indexed_json): - with open(indexed_json) as json_file: - info = json.load(json_file) - test_name = os.path.splitext(info['name'])[0] - return test_name - - indexed_log = os.path.join(container_log, '.'.join((container_log, sequence_tag, index_formatted, ArtGrid.LOG_TGZ))) - log.debug("Looking for log") - if os.path.exists(indexed_log): - tar = tarfile.open(indexed_log) - for name in tar.getnames(): - if ArtGrid.ATHENA_STDOUT in name: - log.debug("Found %s", ArtGrid.ATHENA_STDOUT) - info = tar.extractfile(name).read() - # try art-job-name - match = re.search(r"art-job-name:\s(\S+)", info) - if match: - log.debug("Found 'art-job-name'") - return os.path.splitext(match.group(1))[0] - - # try Job Name - match = re.search(r"Job Name:\s(\S+)", info) - if match: - log.debug("Found 'Job Name:'") - return os.path.splitext(match.group(1))[0] - - log.error("Cannot retrieve job_name from art-job.json or logfile") - return None - - def get_test_name(self, rucio_name, rucio_log_name): - """Return test_name for log rucio_name.""" + def copy_package(self, indexed_package, dst, user): + """Copy package to dst.""" log = logging.getLogger(MODULE) - if not RUCIO: - log.critical("RUCIO not available") - exit(1) - - tmp_dir = tempfile.mkdtemp() - atexit.register(shutil.rmtree, tmp_dir) - - tmp_json = os.path.join(tmp_dir, ArtGrid.ART_JOB) - - if rucio_name is not None: - (exit_code, out, err) = run_command(' '.join(('xrdcp -N -f ', rucio_name, tmp_json))) - if exit_code == 0: - log.debug("copied json %s", rucio_name) - with open(tmp_json) as json_file: - info = json.load(json_file) - test_name = os.path.splitext(info['name'])[0] - return test_name - - tmp_log = os.path.join(tmp_dir, ArtGrid.LOG_TGZ) - - if rucio_log_name is not None: - (exit_code, out, err) = run_command(' '.join(('xrdcp -N -f ', rucio_log_name, tmp_log))) - if exit_code == 0: - log.debug("copied log %s %s", rucio_log_name, tmp_log) - tar = tarfile.open(tmp_log) - for name in tar.getnames(): - if ArtGrid.ATHENA_STDOUT in name: - log.debug("Found %s", ArtGrid.ATHENA_STDOUT) - info = tar.extractfile(name).read() - # try art-job-name - match = re.search(r"art-job-name:\s(\S+)", info) - if match: - log.debug("Found 'art-job-name'") - return os.path.splitext(match.group(1))[0] - - # try Job Name - match = re.search(r"Job Name:\s(\S+)", info) - if match: - log.debug("Found 'Job Name:'") - return os.path.splitext(match.group(1))[0] - - log.debug("Cannot retrieve job_name from art-job.json or logfile") - return None + real_user = os.getenv('USER', ArtGrid.ARTPROD) + user = real_user if user is None else user + default_dst = ArtGrid.EOS_OUTPUT_DIR if real_user == ArtGrid.ARTPROD else '.' + dst = default_dst if dst is None else dst - def copy_output(self, outfile, dst): - """Copy outfile to dst.""" - log = logging.getLogger(MODULE) - if not RUCIO: - log.critical("RUCIO not available") - exit(1) + # for debugging + cleanup = True result = 0 - outfile_pattern = r"([^\.]+)\.([^\.]+)\.([^\.]+)\.(.+)\.([^\.]+)\.([^\.]+)\.([^\.]+)\.([^\.]+)\.([^\.\n]+)" - match = re.search(outfile_pattern, outfile) - if not match: - log.error("%s does not match pattern", outfile) - return 1 - (user_type, user, experiment, nightly_release, project, platform, nightly_tag, sequence_tag, package) = match.groups() - dst_dir = os.path.join(dst, nightly_release, nightly_tag, project, platform, package) - log.info(dst_dir) - scope = '.'.join((user_type, user)) + package = indexed_package.split('.')[0] + dst_dir = os.path.join(dst, self.nightly_release, self.project, self.platform, self.nightly_tag, package) + log.info("dst_dir %s", dst_dir) tmp_dir = tempfile.mkdtemp() - atexit.register(shutil.rmtree, tmp_dir) - - tmp_json = os.path.join(tmp_dir, ArtGrid.ART_JOB) - tmp_log = os.path.join(tmp_dir, ArtGrid.LOG_TGZ) - tmp_tar = os.path.join(tmp_dir, ArtGrid.JOB_TAR) - - jsons = self.get_rucio_map(scope, outfile, ArtGrid.JSON) - logs = self.get_rucio_map(scope, outfile, ArtGrid.LOG) - tars = self.get_rucio_map(scope, outfile, ArtGrid.OUTPUT) + if cleanup: + atexit.register(shutil.rmtree, tmp_dir, ignore_errors=True) - # log.debug(jsons) - # log.debug(logs) - - for number in tars: + for entry in self.rucio.get_table(user, indexed_package): + index = entry['grid_index'] + log.debug("Index %d", index) # get the test name - rucio_name = jsons[number]['rucio_name'] if number in jsons else None - rucio_log_name = logs[number]['rucio_name'] if number in logs else None - test_name = self.get_test_name(rucio_name, rucio_log_name) + test_name = entry['job_name'] if test_name is None: - log.error("JSON Lookup Error for test %s", rucio_name) + log.error("JSON Lookup Error for test %d", index) result = 1 continue + log.debug("Test_name %s", test_name) # create tmp test directory test_dir = os.path.join(tmp_dir, test_name) mkdir_p(test_dir) - # copy art-job in, ignore error - run_command(' '.join(('xrdcp -N -f', rucio_name, tmp_json))) - shutil.copyfile(tmp_json, os.path.join(test_dir, ArtGrid.ART_JOB)) + # copy art-job.json + result |= self.copy_json(os.path.join(tempfile.gettempdir(), entry['outfile'] + "_EXT0", self.__get_rucio_name(user, entry, 'json')), test_dir) # copy and unpack log - log_source = logs[number]['source'] - (exit_code, out, err) = run_command(' '.join(('xrdcp -N -f', rucio_log_name, tmp_log))) - if exit_code != 0: - log.error("Log Unpack Error: %d %s %s", exit_code, out, err) - result = 1 - else: - tar = tarfile.open(tmp_log) - for member in tar.getmembers(): - tar.extract(member, path=test_dir) - # does not work: tar.extractall() - tar.close() - - log.info("Copying: %d %s", number, test_name) - log.info("- json: %s", jsons[number]['source']) - log.info("- log: %s", log_source) - log.info("- tar: %s", tars[number]['source']) + result |= self.copy_log(user, package, test_name, test_dir) # copy results and unpack - (exit_code, out, err) = run_command(' '.join(('xrdcp -N -f', tars[number]['rucio_name'], tmp_tar))) - if exit_code != 0: - log.error("TAR Error: %d %s %s", exit_code, out, err) - result = 1 - else: - tar = tarfile.open(tmp_tar) - tar.extractall(path=test_dir) - tar.close() + result |= self.copy_results(user, package, test_name, test_dir) # copy to eos - dst_target = os.path.join(dst_dir, test_name) - log.info("to: %s", dst_target) - if dst_target.startswith('/eos'): - mkdir_cmd = 'eos ' + ArtGrid.EOS_MGM_URL + ' mkdir -p' - xrdcp_target = ArtGrid.EOS_MGM_URL + dst_target - else: - mkdir_cmd = 'mkdir -p' - xrdcp_target = dst_target - - (exit_code, out, err) = run_command(' '.join((mkdir_cmd, dst_target))) - if exit_code != 0: - log.error("Mkdir Error: %d %s %s", exit_code, out, err) - result = 1 - else: - (exit_code, out, err) = run_command(' '.join(('xrdcp -N -r -v', test_dir, xrdcp_target))) - if exit_code not in [0, 51, 54]: - # 0 all is ok - # 51 File exists - # 54 is already copied - log.error("XRDCP to EOS Error: %d %s %s", exit_code, out, err) - result = 1 + result |= self.copy_to_eos(index, test_name, test_dir, dst_dir) # cleanup - shutil.rmtree(test_dir) + if cleanup: + shutil.rmtree(test_dir) return result - def get_rucio_map(self, scope, outfile, extension): - """Return map of entries by grid_index into { source, rucio_name }.""" + def copy_json(self, json_file, test_dir): + """Copy json.""" log = logging.getLogger(MODULE) - if not RUCIO: - log.critical("RUCIO not available") - exit(1) + log.info("Copying JSON: %s", json_file) + shutil.copyfile(json_file, os.path.join(test_dir, ArtRucio.ART_JOB)) + return 0 - CERN = 'CERN-PROD_SCRATCHDISK' - - LOG_PATTERN = r"\.(\d{6})\.log\.tgz" - JSON_PATTERN = r"\._(\d{6})\.art-job\.json" - OUTPUT_PATTERN = r"\._(\d{6})\.tar" - table = {} - rucio_client = rucio.client.Client() - log.debug("Looking for %s", outfile + extension) - for rep in rucio_client.list_replicas([{'scope': scope, 'name': outfile + extension}], schemes=['root']): - source = None - rucio_name = None - log.debug("Found in %s", rep['states'].keys()) - # first look at CERN - if CERN in rep['states'].keys() and rep['states'][CERN] == 'AVAILABLE': - source = CERN - rucio_name = rep['rses'][CERN][0] - else: - for rse in rep['states'].keys(): - if rep['states'][rse] == 'AVAILABLE' and len(rep['rses'][rse]) >= 1: - source = rse - rucio_name = rep['rses'][rse][0] - break - - # maybe not found at all - if rucio_name is not None: - log.debug("Found rucio name %s in %s", rucio_name, source) - pattern = JSON_PATTERN if extension == ArtGrid.JSON else LOG_PATTERN if extension == ArtGrid.LOG else OUTPUT_PATTERN - match = re.search(pattern, rucio_name) - if match: - number = int(match.group(1)) - else: - log.warning("%s does not contain test number using pattern %s skipped...", rucio_name, pattern) - continue + def copy_log(self, user, package, test_name, test_dir): + """Copy and unpack log file.""" + log = logging.getLogger(MODULE) + log.info("Copying LOG: %s %s", package, test_name) - table[number] = {'source': source, 'rucio_name': rucio_name} + tar = self.__open_tar(user, package, test_name, tar=False) + if tar is not None: + log.info("Unpacking LOG: %s", test_dir) + logdir = None + for member in tar.getmembers(): + # does not work: tar.extractall() + tar.extract(member, path=test_dir) + logdir = member.name.split('/', 2)[0] + + tar.close() - if not table: - log.warning("Outfile %s not found or empty", outfile + extension) - return table + # rename top level log dir to logs + if logdir is not None: + os.chdir(test_dir) + os.rename(logdir, "tarball_logs") + return 0 - def task_package(self, root, package, job_type, sequence_tag, no_action): - """TBD.""" + def copy_results(self, user, package, test_name, test_dir): + """Copy results and unpack.""" + log = logging.getLogger(MODULE) + log.info("Copying TAR: %s %s", package, test_name) + + tar = self.__open_tar(user, package, test_name) + if tar is not None: + log.info("Unpacking TAR: %s", test_dir) + tar.extractall(path=test_dir) + tar.close() + return 0 + + def copy_to_eos(self, index, test_name, test_dir, dst_dir): + """Copy to eos.""" + log = logging.getLogger(MODULE) + dst_target = os.path.join(dst_dir, test_name) + if dst_target.startswith('/eos'): + # mkdir_cmd = 'eos ' + ArtGrid.EOS_MGM_URL + ' mkdir -p' + mkdir_cmd = None + xrdcp_target = ArtGrid.EOS_MGM_URL + dst_target + '/' + else: + mkdir_cmd = 'mkdir -p' + xrdcp_target = dst_target + log.info("Copying to DST: %d %s", index, xrdcp_target) + + if mkdir_cmd is not None: + (exit_code, out, err, command, start_time, end_time) = run_command(' '.join((mkdir_cmd, dst_target))) + if exit_code != 0: + log.error("Mkdir Error: %d %s %s", exit_code, out, err) + return 1 + + cmd = ' '.join(('xrdcp -N -r -p -v', test_dir, xrdcp_target)) + max_trials = 6 + wait_time = 4 * 60 # seconds + trial = 1 + while True: + log.info("Trial %d, using: %s", trial, cmd) + (exit_code, out, err, command, start_time, end_time) = run_command(cmd) + if exit_code in [0, 50, 51, 54]: + # 0 all is ok + # 50 File exists + # 51 File exists + # 54 is already copied + return 0 + + # 3010 connection problem + if exit_code != 3010 or trial >= max_trials: + log.error("XRDCP to EOS Error: %d %s %s", exit_code, out, err) + return 1 + + log.error("Possibly recoverable EOS Error: %d %s %s", exit_code, out, err) + log.info("Waiting for %d seconds", wait_time) + time.sleep(wait_time) + trial += 1 + + def task_package(self, root, package, job_type, sequence_tag, no_action, config_file): + """Submit a single package.""" log = logging.getLogger(MODULE) result = {} number_of_tests = len(self.get_files(root, job_type, "all", self.nightly_release, self.project, self.platform)) @@ -482,65 +336,112 @@ class ArtGrid(ArtBase): log.info('root %s', root) log.info('Handling %s for %s project %s on %s', package, self.nightly_release, self.project, self.platform) log.info("Number of tests: %d", number_of_tests) - submit_dir = os.path.join(self.submit_directory, package) - run_dir = os.path.join(submit_dir, "run") - script_directory = self.copy_art(run_dir) + run_dir = os.path.join(self.submit_directory, package, 'run') + script_directory = self.copy_art('../python', run_dir) - result = self.task(script_directory, package, job_type, sequence_tag, no_action) + result = self.task(script_directory, package, job_type, sequence_tag, no_action, config_file) return result - def task_list(self, job_type, sequence_tag, package=None, no_action=False, wait_and_copy=True): - """TBD.""" + def task_list(self, job_type, sequence_tag, package=None, no_action=False, wait_and_copy=True, config_file=None): + """Submit a list of packages.""" log = logging.getLogger(MODULE) - # job will be submitted from tmp directory - self.submit_directory = tempfile.mkdtemp(dir='.') - # make sure tmp is removed afterwards - atexit.register(shutil.rmtree, self.submit_directory) + test_copy = False - # make sure script directory exist - self.exit_if_no_script_directory() + if test_copy: + all_results = {} + all_results[0] = ('TrigAnalysisTest', "xxx", "yyy", 0) - # get the test_*.sh from the test directory - test_directories = self.get_test_directories(self.get_script_directory()) - if not test_directories: - log.warning('No tests found in directories ending in "test"') + else: + # job will be submitted from tmp directory + self.submit_directory = tempfile.mkdtemp(dir='.') - all_results = {} + # make sure tmp is removed afterwards + atexit.register(shutil.rmtree, self.submit_directory, ignore_errors=True) - if package is None: - config = None if self.skip_setup else self.get_config() - excluded_packages = config.get('excluded-packages', []) if config is not None else [] + # make sure script directory exist + self.exit_if_no_script_directory() - # submit tasks for all packages - for package, root in test_directories.items(): - if package in excluded_packages: - log.warning("Package %s is excluded", package) - else: - all_results.update(self.task_package(root, package, job_type, sequence_tag, no_action)) - else: - # Submit single package - root = test_directories[package] - all_results.update(self.task_package(root, package, job_type, sequence_tag, no_action)) + # get the test_*.sh from the test directory + test_directories = self.get_test_directories(self.get_script_directory()) + if not test_directories: + log.warning('No tests found in directories ending in "test"') + + configuration = None if self.skip_setup else ArtConfiguration(config_file) + + all_results = {} + + if package is None: + # submit tasks for all packages + for package, root in test_directories.items(): + if configuration is not None and configuration.get(self.nightly_release, self.project, self.platform, package, 'exclude', False): + log.warning("Package %s is excluded", package) + else: + all_results.update(self.task_package(root, package, job_type, sequence_tag, no_action, config_file)) + else: + # Submit single package + root = test_directories[package] + all_results.update(self.task_package(root, package, job_type, sequence_tag, no_action, config_file)) + + if no_action: + log.info("--no-action specified, so not waiting for results") + return 0 + + if len(all_results) == 0: + log.warning('No tests found, nothing to submit.') + return 0 # wait for all results if wait_and_copy: + configuration = ArtConfiguration(config_file) + + log.info("Executor started with %d threads", self.max_jobs) + executor = concurrent.futures.ThreadPoolExecutor(max_workers=self.max_jobs) + future_set = [] + while len(all_results) > 0: - time.sleep(ArtGrid.RESULT_WAIT_INTERVAL) - # force a cpy as we are modifying all_results + log.debug("No of Results %d", len(all_results)) + log.debug("Waiting...") + if not test_copy: + time.sleep(ArtGrid.RESULT_WAIT_INTERVAL) + log.debug("Done Waiting") + + # force a copy of all_results since we are modifying all_results for jedi_id in list(all_results): + package = all_results[jedi_id][0] + # skip packages without copy + if not configuration.get(self.nightly_release, self.project, self.platform, package, "copy"): + log.info("Copy not configured for %s - skipped", package) + del all_results[jedi_id] + continue + + log.debug("Checking package %s for %s", package, str(jedi_id)) status = self.task_status(jedi_id) if status is not None: log.info("JediID %s finished with status %s", str(jedi_id), status) - if status == 'done': - package = all_results[jedi_id][0] - # FIXME limited - if self.nightly_release in ['21.0', '21.0-mc16d'] and package in ['Tier0ChainTests']: - log.info("Copy %s to eos area", package) - self.copy(package) + if status in ['finished', 'done']: + # job_name = all_results[jedi_id][1] + # outfile = all_results[jedi_id][2] + index = all_results[jedi_id][3] + dst = configuration.get(self.nightly_release, self.project, self.platform, package, "dst", ArtGrid.EOS_OUTPUT_DIR) + indexed_package = package + ('.' + str(index) if index > 0 else '') + log.info("Copy %s to %s", indexed_package, dst) + future_set.append(executor.submit(copy_job, self.art_directory, indexed_package, dst)) del all_results[jedi_id] + # wait for all copy jobs to finish + log.info("Waiting for copy jobs to finish...") + for future in concurrent.futures.as_completed(future_set): + (indexed_package, exit_code, out, err, start_time, end_time) = future.result() + if exit_code == 0: + log.info("Copied %s exit_code: %d", indexed_package, exit_code) + log.info(" starting %s until %s", start_time.strftime('%Y-%m-%dT%H:%M:%S'), end_time.strftime('%Y-%m-%dT%H:%M:%S')) + else: + log.error("Failed to copy: %s exit_code: %d", indexed_package, exit_code) + print err + print out + return 0 def task_status(self, jedi_id): @@ -556,153 +457,238 @@ class ArtGrid(ArtBase): return "done" try: - r = urllib2.urlopen('https://bigpanda.cern.ch/task/' + str(jedi_id) + '?json=true') + url = 'https://bigpanda.cern.ch/task/' + str(jedi_id) + '?json=true' + r = urllib2.urlopen(url) s = json.load(r) - status = s['task']['superstatus'] - if status in ["done", "finished", "failed", "aborted", "broken"]: - log.info("Task: %s %s", str(jedi_id), str(status)) - return status + if (s is not None) and ('task' in s): + task = s['task'] + if (task is not None) and ('status' in task): + status = task['status'] + if status in ["done", "finished", "failed", "aborted", "broken"]: + log.info("Task: %s %s", str(jedi_id), str(status)) + return status except urllib2.HTTPError, e: - log.error('%s for %s status', str(e.code), str(jedi_id)) + log.error('%s for %s status: %s', str(e.code), str(jedi_id), url) return None - def task(self, script_directory, package, job_type, sequence_tag, no_action=False): + def task_job(self, grid_options, sub_cmd, script_directory, sequence_tag, package, outfile, job_type='', number_of_tests=0, split=0, job_name='', inds='', n_files=0, in_file=False, ncores=1, no_action=False): """ - Submit a task, consisting of multiple jobs. + Submit a single job. - For 'single' jobs each task contains exactly one job. - Returns a map of JediIds to tuples of (package, test_name) + Returns jedi_id or 0 if submission failed. + + # art-task-grid.sh [--no-action] batch <submit_directory> <script_directory> <sequence_tag> <package> <outfile> <job_type> <number_of_tests> + # + # art-task-grid.sh [--no-action] single [--inds <input_file> --n-files <number_of_files> --split <split> --in] <submit_directory> <script_directory> <sequence_tag> <package> <outfile> <job_name> """ log = logging.getLogger(MODULE) - log.info('Running art task') + cmd = ' '.join((os.path.join(self.art_directory, 'art-task-grid.sh'), + '--no-action' if no_action else '', + sub_cmd)) + + if sub_cmd == 'single': + cmd = ' '.join((cmd, + '--inds ' + inds if inds != '' else '', + '--n-files ' + str(n_files) if n_files > 0 else '', + '--split ' + str(split) if split > 0 else '', + '--in' if in_file else '', + '--ncore ' + str(ncores) if ncores > 1 else '')) + + cmd = ' '.join((cmd, + self.submit_directory, + script_directory, + sequence_tag, + package, + outfile)) + + if sub_cmd == 'batch': + cmd = ' '.join((cmd, + job_type, + str(number_of_tests))) + elif sub_cmd == 'single': + cmd = ' '.join((cmd, + job_name)) + else: + log.critical("Invalid sub_cmd %s", sub_cmd) + exit(1) - config = None if self.skip_setup else self.get_config() - grid_options = self.grid_option(config, package, 'grid-exclude-sites', '--excludedSite=') - grid_options += ' ' + self.grid_option(config, package, 'grid-sites', '--site=') + log.info("cmd: %s", cmd) # run task from Bash Script as is needed in ATLAS setup - # FIXME we need to parse the output + log.info("Grid_options: %s", grid_options) env = os.environ.copy() env['PATH'] = '.:' + env['PATH'] env['ART_GRID_OPTIONS'] = grid_options + log.info("ART_GRID_OPTIONS %s", env['ART_GRID_OPTIONS']) + + jedi_id = -1 + # run the command, no_action is forwarded and used inside the script + (exit_code, out, err, command, start_time, end_time) = run_command(cmd, env=env) + if exit_code != 0: + log.error("art-task-grid failed %d", exit_code) + print err + else: + jedi_id = 0 if no_action else self.get_jedi_id(err) + print out + + log.info('jedi_id: %s', str(jedi_id)) + return jedi_id + + def get_grid_options(self, package, config_file): + """Return grid options for a package.""" + log = logging.getLogger(MODULE) + if self.skip_setup: + return '' + + configuration = ArtConfiguration(config_file) + grid_options = configuration.get_option(self.nightly_release, self.project, self.platform, package, 'exclude-sites', '--excludedSite=') + grid_options += ' ' + configuration.get_option(self.nightly_release, self.project, self.platform, package, 'sites', '--site=') + log.info('grid_options: %s', grid_options) + return grid_options + + def task(self, script_directory, package, job_type, sequence_tag, no_action=False, config_file=None): + """ + Submit a task, consisting of multiple jobs. + + For 'single' jobs each task contains exactly one job. + Returns a map of jedi_id to (package, test_name, out_file) + """ + log = logging.getLogger(MODULE) + log.info('Running art task') + + grid_options = self.get_grid_options(package, config_file) + test_directories = self.get_test_directories(self.get_script_directory()) test_directory = test_directories[package] number_of_batch_tests = len(self.get_files(test_directory, job_type, "batch", self.nightly_release, self.project, self.platform)) - MAX_OUTFILE_LEN = 132 - user = env['USER'] if self.skip_setup else ArtGrid.ARTPROD - outfile = self.get_outfile(user, package, sequence_tag) + user = os.getenv('USER', 'artprod') if self.skip_setup else ArtGrid.ARTPROD + outfile = self.rucio.get_outfile_name(user, package, sequence_tag) result = {} # submit batch tests if number_of_batch_tests > 0: - if len(outfile) > MAX_OUTFILE_LEN: - log.error("OutFile string length > %d: ", MAX_OUTFILE_LEN, outfile) - return 1 + self.exit_if_outfile_too_long(outfile) # Batch - cmd = ' '.join((os.path.join(self.art_directory, 'art-task-grid.sh'), '--skip-setup' if self.skip_setup else '', self.submit_directory, script_directory, package, job_type, sequence_tag, str(number_of_batch_tests), self.get_nightly_release_short(), self.project, self.platform, self.nightly_tag, outfile)) - log.info("batch: %s", cmd) - - if not no_action: - (exit_code, out, err) = run_command(cmd, env=env) - if exit_code != 0: - log.error("art-task-grid failed %d", exit_code) - print out - print err - else: - jediID = self.get_jedi_id(err) - if jediID > 0: - result[jediID] = (package, "", outfile) - log.info(out) + log.info("Batch") + jedi_id = self.task_job(grid_options, "batch", script_directory, sequence_tag, package, outfile, job_type=job_type, number_of_tests=number_of_batch_tests, no_action=no_action) + if jedi_id > 0: + result[jedi_id] = (package, "", outfile, 0) # submit single tests index = 1 - for test_name in self.get_files(test_directory, job_type, "single", self.nightly_release, self.project, self.platform): - job = os.path.join(test_directory, test_name) + for job_name in self.get_files(test_directory, job_type, "single", self.nightly_release, self.project, self.platform): + job = os.path.join(test_directory, job_name) header = ArtHeader(job) inds = header.get(ArtHeader.ART_INPUT) - nFiles = header.get(ArtHeader.ART_INPUT_NFILES) + n_files = header.get(ArtHeader.ART_INPUT_NFILES) split = header.get(ArtHeader.ART_INPUT_SPLIT) + ncores = header.get(ArtHeader.ART_CORES) - outfile_test = self.get_outfile(user, package, sequence_tag, str(index)) - if len(outfile_test) > MAX_OUTFILE_LEN: - log.error("ERROR: OutFile string length > %d : %s ", MAX_OUTFILE_LEN, outfile_test) - return 1 + outfile_test = self.rucio.get_outfile_name(user, package, sequence_tag, str(index)) + self.exit_if_outfile_too_long(outfile_test) # Single - cmd = ' '.join((os.path.join(self.art_directory, 'art-task-grid.sh'), '--skip-setup' if self.skip_setup else '', '--test-name ' + test_name, '--inDS ' + inds, '--nFiles ' + str(nFiles) if nFiles > 0 else '', self.submit_directory, script_directory, package, job_type, sequence_tag, str(split), self.get_nightly_release_short(), self.project, self.platform, self.nightly_tag, outfile_test)) - log.info("single: %s", cmd) - - if not no_action: - (exit_code, out, err) = run_command(cmd, env=env) - if exit_code != 0: - log.error("art-task-grid failed %d", exit_code) - print out - print err - else: - jediID = self.get_jedi_id(err) - if jediID > 0: - result[jediID] = (package, test_name, outfile_test) + log.info("Single") + jedi_id = self.task_job(grid_options, "single", script_directory, sequence_tag, package, outfile_test, split=split, job_name=job_name, inds=inds, n_files=n_files, in_file=True, ncores=ncores, no_action=no_action) - log.info(out) + if jedi_id > 0: + result[jedi_id] = (package, job_name, outfile_test, index) index += 1 return result - def job(self, package, job_type, sequence_tag, index_type, index_or_name, out): - """TBD.""" + def batch(self, sequence_tag, package, out, job_type, job_index): + """Run a single job by job_index of a 'batch' submission.""" + log = logging.getLogger(MODULE) + log.info('Running art grid batch') + log.info("%s %s %s %s %s %s %s %s", self.nightly_release, self.project, self.platform, self.nightly_tag, package, job_type, str(job_index), out) + + test_directories = self.get_test_directories(self.get_script_directory()) + test_directory = test_directories[package] + + test_list = self.get_files(test_directory, job_type, "batch", self.nightly_release, self.project, self.platform) + + # NOTE: grid counts from 1 + index = int(job_index) + job_name = test_list[index - 1] + + in_file = None + + return self.job(test_directory, package, job_name, job_type, out, in_file) + + def single(self, sequence_tag, package, out, job_name, in_file): + """Run a single job by name of a 'single' submission.""" log = logging.getLogger(MODULE) - log.info('Running art job grid') - log.info("%s %s %s %s %s %s %s %s", self.nightly_release, self.project, self.platform, self.nightly_tag, package, job_type, str(index_or_name), out) + log.info('Running art grid single') + log.info("%s %s %s %s %s %s %s %s", self.nightly_release, self.project, self.platform, self.nightly_tag, package, job_name, out, in_file) test_directories = self.get_test_directories(self.get_script_directory()) test_directory = test_directories[package] - if index_type == "batch": - test_list = self.get_files(test_directory, job_type, "batch", self.nightly_release, self.project, self.platform) - # minus one for grid - index = int(index_or_name) - test_name = test_list[index - 1] - else: - test_name = index_or_name + job_type = 'grid' + return self.job(test_directory, package, job_name, job_type, out, in_file) + + def job(self, test_directory, package, job_name, job_type, out, in_file): + """Run a job.""" + log = logging.getLogger(MODULE) + + # informing panda, ignoring errors for now + panda_id = os.getenv('PandaID', '0') - log.info("art-job-name: %s", test_name) + log.info("art-job-name: %s", job_name) - test_file = os.path.join(test_directory, test_name) - # arguments are SCRIPT_DIRECTORY, PACKAGE, TYPE, TEST_NAME, NIGHTLY_RELEASE, PROJECT, PLATFORM, NIGHTLY_TAG - command = ' '.join((test_file, self.get_script_directory(), package, job_type, test_name, self.nightly_release, self.project, self.platform, self.nightly_tag)) + test_file = os.path.join(test_directory, job_name) - log.debug(test_name) + # Tests are called with arguments: PACKAGE TEST_NAME SCRIPT_DIRECTORY TYPE [IN_FILE] + script_directory = self.get_script_directory() + command = ' '.join((test_file, package, job_name, script_directory, job_type, in_file if in_file is not None else '')) + + log.debug(job_name) log.debug(test_directory) log.debug(command) # run the test env = os.environ.copy() env['PATH'] = '.:' + env['PATH'] - (exit_code, output, error) = run_command(command, env=env) + env['ArtScriptDirectory'] = script_directory + env['ArtPackage'] = package + env['ArtJobType'] = job_type + env['ArtJobName'] = job_name + if in_file is not None: + env['ArtInFile'] = in_file + + header = ArtHeader(test_file) + ncores = header.get(ArtHeader.ART_CORES) + if ncores > 1: + nthreads = header.get(ArtHeader.ART_INPUT_NFILES) + (exit_code, output, error, command, start_time, end_time) = run_command_parallel(command, nthreads, ncores, env=env) + else: + (exit_code, output, error, command, start_time, end_time) = run_command(command, env=env) print output if (exit_code != 0): - log.error("Test %s failed %d", str(index_or_name), exit_code) + log.error("Test %s failed %d", job_name, exit_code) print error # NOTE: exit_code always 0 print error # gather results result = {} - result['name'] = test_name + result['name'] = job_name result['exit_code'] = exit_code result['test_directory'] = test_directory - result['result'] = self.get_art_results(output) + result['result'] = ArtBase.get_art_results(output) + result['panda_id'] = panda_id # write out results - with open(os.path.join(ArtGrid.ART_JOB), 'w') as jobfile: + with open(os.path.join(ArtRucio.ART_JOB), 'w') as jobfile: json.dump(result, jobfile, sort_keys=True, indent=4, ensure_ascii=False) - log.info("Wrote %s", ArtGrid.ART_JOB) + log.info("Wrote %s", ArtRucio.ART_JOB) # grab the content of "jobReport.json", add the art dictionary and write it back if os.path.isfile(ArtGrid.JOB_REPORT): @@ -714,6 +700,12 @@ class ArtGrid(ArtBase): json.dump(info, json_file, sort_keys=True, indent=4, ensure_ascii=False) json_file.truncate() log.info("Updated %s", ArtGrid.JOB_REPORT) + else: + with open(ArtGrid.JOB_REPORT, 'w') as json_file: + info = {} + info[ArtGrid.JOB_REPORT_ART_KEY] = result + json.dump(info, json_file, sort_keys=True, indent=4, ensure_ascii=False) + log.info("Updated %s", ArtGrid.JOB_REPORT) # pick up the outputs tar_file = tarfile.open(out, mode='w') @@ -732,7 +724,7 @@ class ArtGrid(ArtBase): tar_file.add(out_name) # pick up art-header named outputs - for path_name in ArtHeader(test_file).get('art-output'): + for path_name in ArtHeader(test_file).get(ArtHeader.ART_OUTPUT): for out_name in glob.glob(path_name): log.info('Tar file contains: %s', out_name) tar_file.add(out_name) @@ -741,48 +733,23 @@ class ArtGrid(ArtBase): # Always return 0 return 0 - def get_grid_map(self, user, package, sequence_tag=0, nightly_tag=None): - """Return grid map of test_name to grid_index.""" - log = logging.getLogger(MODULE) - scope = '.'.join(('user', user)) - - outfile = self.get_outfile(user, package, sequence_tag=sequence_tag, nightly_tag=nightly_tag) - log.debug("outfile %s", outfile) - jsons = self.get_rucio_map(scope, outfile, ArtGrid.JSON) - logs = self.get_rucio_map(scope, outfile, ArtGrid.LOG) - - result = {} - for grid_index in logs: - rucio_name = jsons[grid_index]['rucio_name'] if grid_index in jsons else None - rucio_log_name = logs[grid_index]['rucio_name'] if grid_index in logs else None - test_name = self.get_test_name(rucio_name, rucio_log_name) - if test_name is None: - # log.warning("JSON Lookup failed for test %s", rucio_log_name if rucio_name is None else rucio_name) - continue - - result[test_name] = int(grid_index) - return result - - def list(self, package, job_type, index_type, json_format, user, nogrid): - """TBD.""" - log = logging.getLogger(MODULE) + def list(self, package, job_type, index_type, json_format, user): + """List all jobs available.""" user = ArtGrid.ARTPROD if user is None else user # make sure script directory exist self.exit_if_no_script_directory() - if not nogrid: - log.info("Getting grid map...") - grid_map = self.get_grid_map(user, package) - - log.info("Getting test names...") - test_names = self.get_list(self.get_script_directory(), package, job_type, index_type) json_array = [] - for test_name in test_names: - name = os.path.splitext(test_name)[0] + for entry in self.rucio.get_table(user, package): + # print entry json_array.append({ - 'name': name, - 'grid_index': str(grid_map[name]) if not nogrid and name in grid_map else '-1' + 'name': entry['job_name'], + 'grid_index': entry['grid_index'], + 'job_index': entry['job_index'], + 'single_index': entry['single_index'], + 'file_index': entry['file_index'], + 'outfile': entry['outfile'] }) if json_format: @@ -790,33 +757,44 @@ class ArtGrid(ArtBase): return 0 i = 0 + print "Example FileName: user.artprod.atlas.21.0.Athena.x86_64-slc6-gcc62-opt.2018-02-25T2154.314889.TrigInDetValidation.<Single>" + print "Example OutputName: user.artprod.<Job>.EXT1._<Grid>.tar.<File>" + print + print '{:-^5}'.format('Index'), \ + '{:-^60}'.format('Name'), \ + '{:-^6}'.format('Grid'), \ + '{:-^9}'.format('Job'), \ + '{:-^6}'.format('Single'), \ + '{:-^4}'.format('File'), \ + '{:-^80}'.format('FileName') + for entry in json_array: - print str(i) + ' ' + entry['name'] + (' ' + entry['grid_index']) + print '{:5d}'.format(i), \ + '{:60}'.format('None' if entry['name'] is None else entry['name']), \ + '{:06d}'.format(entry['grid_index']), \ + '{:9d}'.format(entry['job_index']), \ + '{:6d}'.format(entry['single_index']), \ + '{:4d}'.format(entry['file_index']), \ + '{:80}'.format(entry['outfile']) i += 1 - # print warnings - if not nogrid: - for entry in json_array: - if entry['grid_index'] < 0: - log.warning('test %s could not be found in json or log', entry['name']) - return 0 def log(self, package, test_name, user): - """TBD.""" + """Print the log of a job.""" log = logging.getLogger(MODULE) user = ArtGrid.ARTPROD if user is None else user # make sure script directory exist self.exit_if_no_script_directory() - tar = self.open_tar(user, package, test_name, ArtGrid.LOG) + tar = self.__open_tar(user, package, test_name, tar=False) if tar is None: log.error("No log tar file found") return 1 for name in tar.getnames(): - if ArtGrid.ATHENA_STDOUT in name: + if ArtRucio.ATHENA_STDOUT in name: f = tar.extractfile(name) content = f.read() print content @@ -825,18 +803,22 @@ class ArtGrid(ArtBase): return 0 def output(self, package, test_name, user): - """TBD.""" + """Download the output of a job.""" log = logging.getLogger(MODULE) user = ArtGrid.ARTPROD if user is None else user # make sure script directory exist self.exit_if_no_script_directory() - outfile = self.get_outfile(user, package) - tar_dir = os.path.join(tempfile.gettempdir(), outfile + ArtGrid.OUTPUT) + outfile = self.rucio.get_outfiles(user, package)[0] + if not outfile.endswith(package): + # remove .13 + outfile = os.path.splitext(outfile)[0] + job_name = os.path.splitext(test_name)[0] + tar_dir = os.path.join(tempfile.gettempdir(), outfile, job_name) mkdir_p(tar_dir) - tar = self.open_tar(user, package, test_name, ArtGrid.OUTPUT) + tar = self.__open_tar(user, package, test_name) if tar is None: log.error("No output tar file found") return 1 @@ -844,16 +826,16 @@ class ArtGrid(ArtBase): tar.extractall(path=tar_dir) tar.close() print "Output extracted in", tar_dir + return 0 - def compare(self, package, test_name, days, file_names, user): - """TBD.""" + def compare(self, package, test_name, days, user, entries=-1, shell=False): + """Compare current output against a job of certain days ago.""" log = logging.getLogger(MODULE) user = ArtGrid.ARTPROD if user is None else user previous_nightly_tag = self.get_previous_nightly_tag(days) log.info("LOG Previous Nightly Tag: %s", str(previous_nightly_tag)) - print "PRINT Previous Nightly Tag", str(previous_nightly_tag) if previous_nightly_tag is None: log.error("No previous nightly tag found") @@ -862,88 +844,65 @@ class ArtGrid(ArtBase): ref_dir = os.path.join('.', 'ref-' + previous_nightly_tag) mkdir_p(ref_dir) - tar = self.open_tar(user, package, test_name, ArtGrid.OUTPUT, previous_nightly_tag) + log.info("Shell = %s", shell) + tar = self.__open_tar(user, package, test_name, nightly_tag=previous_nightly_tag, shell=shell) if tar is None: log.error("No comparison tar file found") return 1 for member in tar.getmembers(): - if member.name in file_names: - tar.extractall(path=ref_dir, members=[member]) + tar.extractall(path=ref_dir, members=[member]) tar.close() - result = 0 - for file_name in file_names: - ref_file = os.path.join(ref_dir, file_name) - if os.path.isfile(ref_file): - print "art-compare:", previous_nightly_tag, file_name - result |= self.compare_ref(file_name, ref_file, 10) - else: - log.error("%s not found in tar file", ref_file) - result = 1 - return result - - def grid_option(self, config, package, key, option_key): - """Based on config, return value for key, or ''. + return self.compare_ref('.', ref_dir, entries) - A global value is pre-pended if found. If not local value is found only the global value is returned, or ''. - """ - if config is None: - return '' - - global_value = config.get(key) - if package not in config.keys(): - return '' if global_value is None else option_key + global_value - - value = config.get(package).get(key) - - if global_value is None: - return '' if value is None else option_key + value - else: - return option_key + global_value + ('' if value is None else ', ' + value) - - def open_tar(self, user, package, test_name, extension, nightly_tag=None): + def __open_tar(self, user, package, test_name, tar=True, nightly_tag=None, shell=False): """Open tar file for particular release.""" log = logging.getLogger(MODULE) - if not RUCIO: - log.critical("RUCIO not available") - exit(1) + log.info("Tar: %s", tar) + nightly_tag = self.nightly_tag if nightly_tag is None else nightly_tag + job_name = os.path.splitext(test_name)[0] - log.info("Getting grid map...") - grid_map = self.get_grid_map(user, package, nightly_tag=nightly_tag) + for entry in self.rucio.get_table(user, package, nightly_tag, shell): + if entry['job_name'] == job_name: - name = os.path.splitext(test_name)[0] - if name not in grid_map: - log.error("No log or tar found for package %s or test %s", package, test_name) - return None + rucio_name = self.__get_rucio_name(user, entry, 'tar' if tar else 'log') - grid_index = grid_map[name] - log.info("Grid Index: %d", grid_index) + log.info("RUCIO: %s", rucio_name) - scope = '.'.join(('user', user)) - outfile = self.get_outfile(user, package, nightly_tag=nightly_tag) - rucio_map = self.get_rucio_map(scope, outfile, extension) - if grid_index not in rucio_map: - log.error("No entry in rucio map for %d", grid_index) - return None + # tmp_dir = tempfile.gettempdir() + tmp_dir = tempfile.mkdtemp() + atexit.register(shutil.rmtree, tmp_dir, ignore_errors=True) - rucio_name = rucio_map[grid_index]['rucio_name'] - log.info("RUCIO: %s", rucio_name) + log.info("Shell = %s", shell) + exit_code = self.rucio.download(rucio_name, tmp_dir, shell) + if exit_code == 0: + tmp_tar = os.path.join(tmp_dir, 'user.' + user, rucio_name) + return tarfile.open(tmp_tar) - tmp_dir = tempfile.mkdtemp() - atexit.register(shutil.rmtree, tmp_dir) + log.error("No log or tar found for package %s or test %s", package, test_name) + return None - tmp_tar = os.path.join(tmp_dir, os.path.basename(rucio_name)) + def __get_rucio_name(self, user, entry, file_type): + rucio_name = None + if file_type == 'json': + rucio_name = '.'.join(('user', user, str(entry['job_index']), 'EXT0', '_{0:06d}'.format(entry['grid_index']), 'art-job', 'json')) + elif file_type == 'tar': + rucio_name = '.'.join(('user', user, str(entry['job_index']), 'EXT1', '_{0:06d}'.format(entry['grid_index']), 'tar')) + else: + rucio_name = '.'.join((entry['outfile'], 'log', str(entry['job_index']), '{0:06d}'.format(entry['grid_index']), 'log.tgz')) - (exit_code, out, err) = run_command(' '.join(('xrdcp -N -f', rucio_name, tmp_dir))) - if exit_code != 0: - log.error("TAR Error: %s %d %s %s", rucio_name, exit_code, out, err) - return None + if entry['file_index'] > 0: + rucio_name = '.'.join((rucio_name, str(entry['file_index']))) - return tarfile.open(tmp_tar) + return rucio_name def get_previous_nightly_tag(self, days): - """TBD. 21:00 is cutoff time.""" + """ + Return the nightly tag of given days ago. + + 21:00 is the cutoff time. Any submission before 21:00 counts as the previous day. + """ directory = os.path.join(ArtGrid.CVMFS_DIRECTORY, self.nightly_release) tags = os.listdir(directory) tags.sort(reverse=True) @@ -955,13 +914,24 @@ class ArtGrid(ArtBase): elif found: # check this is within days... (cutoff is 21:00, just move by 3 hours to get full days) fmt = '%Y-%m-%dT%H%M' - offset = datetime.timedelta(hours=3) - nightly_tag_dt = datetime.datetime.strptime(self.nightly_tag, fmt) + offset - from_dt = nightly_tag_dt.replace(hour=0, minute=0, second=0, microsecond=0) - datetime.timedelta(days=days) - to_dt = from_dt + datetime.timedelta(days=1) - tag_dt = datetime.datetime.strptime(tag, fmt) + offset + offset = timedelta(hours=3) + nightly_tag_dt = datetime.strptime(self.nightly_tag, fmt) + offset + from_dt = nightly_tag_dt.replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=days) + to_dt = from_dt + timedelta(days=1) + tag_dt = datetime.strptime(tag, fmt) + offset within_days = from_dt <= tag_dt and tag_dt < to_dt target_exists = len(glob.glob(os.path.join(directory, tag, self.project, '*', 'InstallArea', self.platform))) > 0 if within_days and target_exists: return tag return None + + def createpoolfile(self): + """Create 'empty' poolfile catalog.""" + path = os.path.join('.', 'PoolFileCatalog.xml') + with open(path, 'w+') as pool_file: + pool_file.write('<!-- Edited By POOL -->\n') + pool_file.write('<!DOCTYPE POOLFILECATALOG SYSTEM "InMemory">\n') + pool_file.write('<POOLFILECATALOG>\n') + pool_file.write('</POOLFILECATALOG>\n') + + return 0 diff --git a/Tools/ART/python/ART/art_header.py b/Tools/ART/python/ART/art_header.py index 23335e4741c1b105ed20e84c283fdcdbebfbbea4..737c377f956e7db8595f667ba5e1aebd9df1e739 100644 --- a/Tools/ART/python/ART/art_header.py +++ b/Tools/ART/python/ART/art_header.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration -"""TBD.""" +# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration +"""Class to handle art-headers.""" __author__ = "Tulay Cuhadar Donszelmann <tcuhadar@cern.ch>" @@ -15,9 +15,10 @@ MODULE = "art.header" class ArtHeader(object): - """TBD.""" + """Class to handle art-headers.""" ART_CI = 'art-ci' + ART_CORES = 'art-cores' ART_DESCRIPTION = 'art-description' ART_INCLUDE = 'art-include' ART_INPUT = 'art-input' @@ -27,11 +28,11 @@ class ArtHeader(object): ART_TYPE = 'art-type' def __init__(self, filename): - """TBD.""" + """Keep arguments, setup patterns for re-use, define possible art-header definitions.""" self.header_format = re.compile(r'#\s(art-[\w-]+):\s+(.+)$') - self.header_format_error1 = re.compile(r'#(art-[\w-]+):\s*(.+)$') - self.header_format_error2 = re.compile(r'#\s\s+(art-[\w-]+):\s*(.+)$') - self.header_format_error3 = re.compile(r'#\s(art-[\w-]+):\S(.*)$') + self.header_format_error1 = re.compile(r'#(art-[\w-]*):\s*(.+)$') + self.header_format_error2 = re.compile(r'#\s\s+(art-[\w-]*):\s*(.+)$') + self.header_format_error3 = re.compile(r'#\s(art-[\w-]*):\S(.*)$') self.filename = filename @@ -46,6 +47,7 @@ class ArtHeader(object): self.add(ArtHeader.ART_CI, ListType, []) # "grid" type only + self.add(ArtHeader.ART_CORES, IntType, 1) self.add(ArtHeader.ART_OUTPUT, ListType, []) self.add(ArtHeader.ART_INPUT, StringType, None) self.add(ArtHeader.ART_INPUT_NFILES, IntType, 1) @@ -54,7 +56,7 @@ class ArtHeader(object): self.read(filename) def add(self, key, value_type, default_value=None, constraint=None): - """TBD.""" + """Add a single header definition.""" self.header[key] = {} self.header[key]['type'] = value_type self.header[key]['default'] = default_value @@ -62,7 +64,7 @@ class ArtHeader(object): self.header[key]['value'] = None # e.g. the value was never set def is_list(self, key): - """TBD.""" + """Return true if key exists and is of ListType.""" return self.header[key]['type'] is ListType if key in self.header else False def read(self, filename): @@ -89,13 +91,21 @@ class ArtHeader(object): # handle values if key not in self.header: log.warning("Unknown art-header %s: %s in file %s", key, value, filename) - self.header[key] = {} - self.header[key]['value'] = value + self.add(key, StringType) + if self.header[key]['value'] is None: + self.header[key]['value'] = value + else: + log.warning("key %s: already set to %s in file %s", key, self.header[key]['value'], filename) except ValueError: log.error("Invalid value in art-header %s: %s in file %s", key, value, filename) def get(self, key): - """TBD.""" + """ + Get the value of a header by key. + + Return default if header not specified. + Warn and return None if header is not defined. + """ log = logging.getLogger(MODULE) if key not in self.header: log.warning("Art seems to look for a header key %s which is not in the list of defined headers.", key) @@ -107,7 +117,7 @@ class ArtHeader(object): return self.header[key]['value'] def print_it(self): - """TBD.""" + """Print content of the headers for this file.""" log = logging.getLogger(MODULE) for key in self.header: log.info("%s: %s %s %s %s", key, self.header[key]['type'], self.header[key]['default'], self.header[key]['value'], self.header[key]['constraint']) diff --git a/Tools/ART/python/ART/art_misc.py b/Tools/ART/python/ART/art_misc.py index d81f3440599e4219e12ac519863f43bc10c3bc95..41e4707f5309c49bc23d2e67e8356057084a1e13 100644 --- a/Tools/ART/python/ART/art_misc.py +++ b/Tools/ART/python/ART/art_misc.py @@ -1,9 +1,10 @@ #!/usr/bin/env python -# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration -"""TBD.""" +# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration +"""Miscellaneous functions.""" __author__ = "Tulay Cuhadar Donszelmann <tcuhadar@cern.ch>" +import concurrent.futures import errno import logging import os @@ -11,11 +12,13 @@ import shlex import subprocess import sys +from datetime import datetime + MODULE = "art.misc" def set_log(kwargs): - """TBD.""" + """Set the default log level and message format depending on --verbose or --quiet options.""" level = logging.DEBUG if kwargs['verbose'] else logging.WARN if kwargs['quiet'] else logging.INFO log = logging.getLogger("art") log.setLevel(level) @@ -30,15 +33,31 @@ def set_log(kwargs): log.propagate = False -def run_command(cmd, dir=None, shell=False, env=None): +def get_atlas_env(): + """Get all environment variables.""" + log = logging.getLogger(MODULE) + try: + nightly_release = os.environ['AtlasBuildBranch'] + project = os.environ['AtlasProject'] + platform = os.environ[project + '_PLATFORM'] + nightly_tag = os.environ['AtlasBuildStamp'] + return (nightly_release, project, platform, nightly_tag) + except KeyError, e: + log.critical("Environment variable not set %s", e) + sys.exit(1) + + +def run_command(cmd, dir=None, shell=False, env=None, verbose=True): """ Run the given command locally. The command runs as separate subprocesses for every piped command. Returns tuple of exit_code, output and err. """ - log = logging.getLogger(MODULE) - log.debug("Execute: %s", cmd) + # leave at print for basic debugging, log sometimes lost + start_time = datetime.now() + if verbose: + print "Execute:", cmd if "|" in cmd: cmd_parts = cmd.split('|') else: @@ -55,13 +74,89 @@ def run_command(cmd, dir=None, shell=False, env=None): i = i + 1 (output, err) = p[i - 1].communicate() exit_code = p[0].wait() + end_time = datetime.now() + + return (exit_code, str(output), str(err), cmd, start_time, end_time) + + +def run_command_parallel(cmd, nthreads, ncores, dir=None, shell=False, env=None, verbose=True): + """ + Run the given command locally in parallel. + + The command runs as separate subprocesses for every piped command. + Returns tuple of exit_code, output and err. + """ + start_time = datetime.now() + log = logging.getLogger(MODULE) + ncores = min(ncores, nthreads) - return exit_code, str(output), str(err) + if env is None: + env = os.environ.copy() + env['ArtThreads'] = str(nthreads) + env['ArtCores'] = str(ncores) -def is_exe(fpath): - """Return True if fpath is executable.""" - return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + # Results + full_exit_code = 0 + full_out = '' + full_err = '' + + # Start + env['ArtProcess'] = "start" + (exit_code, out, err, command, start_time_start, end_time_start) = run_command(cmd, dir=dir, shell=shell, env=env, verbose=verbose) + full_exit_code = full_exit_code if exit_code == 0 else exit_code + full_out += "-+-art-process start out " + start_time_start.strftime('%Y-%m-%dT%H:%M:%S') + "\n" + full_out += out + full_out += "---art-process start out " + end_time_start.strftime('%Y-%m-%dT%H:%M:%S') + "\n" + + full_err += "-+-art-process start err " + start_time_start.strftime('%Y-%m-%dT%H:%M:%S') + "\n" + full_err += err + full_err += "---art-process start err " + end_time_start.strftime('%Y-%m-%dT%H:%M:%S') + "\n" + + log.info("Creating executor with cores: %d", ncores) + executor = concurrent.futures.ThreadPoolExecutor(ncores) + future_set = [] + + # Processing + log.info("Running threads: %d", nthreads) + for index in range(nthreads): + process_env = env.copy() + process_env['ArtProcess'] = str(index) + future_set.append(executor.submit(run_command, cmd, dir=dir, shell=shell, env=process_env, verbose=verbose)) + + log.info("Waiting for threads to finish...") + concurrent.futures.wait(future_set) + for index, future in enumerate(future_set): + (exit_code, out, err, command, start_time_process, end_time_process) = future.result() + full_exit_code = full_exit_code if exit_code == 0 else exit_code + full_out += "-+-art-process " + str(index) + " out " + start_time_process.strftime('%Y-%m-%dT%H:%M:%S') + "\n" + full_out += out + full_out += "---art-process " + str(index) + " out " + end_time_process.strftime('%Y-%m-%dT%H:%M:%S') + "\n" + + full_err += "-+-art-process " + str(index) + " err " + start_time_process.strftime('%Y-%m-%dT%H:%M:%S') + "\n" + full_err += err + full_err += "---art-process " + str(index) + " err " + end_time_process.strftime('%Y-%m-%dT%H:%M:%S') + "\n" + + # End + env['ArtProcess'] = "end" + (exit_code, out, err, command, start_time_end, end_time_end) = run_command(cmd, dir=dir, shell=shell, env=env, verbose=verbose) + full_exit_code = full_exit_code if exit_code == 0 else exit_code + full_out += "-+-art-process end out " + start_time_end.strftime('%Y-%m-%dT%H:%M:%S') + "\n" + full_out += out + full_out += "---art-process end out " + end_time_end.strftime('%Y-%m-%dT%H:%M:%S') + "\n" + + full_err += "-+-art-process end err " + start_time_end.strftime('%Y-%m-%dT%H:%M:%S') + "\n" + full_err += err + full_err += "---art-process end err " + end_time_end.strftime('%Y-%m-%dT%H:%M:%S') + "\n" + + end_time = datetime.now() + + return (full_exit_code, full_out, full_err, cmd, start_time, end_time) + + +def is_exe(path): + """Return True if path is executable.""" + return os.path.isfile(path) and os.access(path, os.X_OK) def make_executable(path): @@ -83,13 +178,7 @@ def mkdir_p(path): def which(program): - """TBD.""" - import os - - def is_exe(fpath): - """TBD.""" - return os.path.isfile(fpath) and os.access(fpath, os.X_OK) - + """Show which program is actually found on the PATH.""" fpath, fname = os.path.split(program) if fpath: if is_exe(program): diff --git a/Tools/ART/python/ART/art_rucio.py b/Tools/ART/python/ART/art_rucio.py new file mode 100755 index 0000000000000000000000000000000000000000..037dae10d3b99f54d0bf22fdf50b932e04eafda8 --- /dev/null +++ b/Tools/ART/python/ART/art_rucio.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python +# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration +"""Class to interact with RUCIO.""" + +__author__ = "Tulay Cuhadar Donszelmann <tcuhadar@cern.ch>" + +import json +import logging +import os +import re +import tempfile + +try: + import rucio.client + RUCIO = True +except ImportError: + RUCIO = False + +from art_misc import run_command + +MODULE = "art.rucio" + + +class ArtRucio(object): + """Class to interact with RUCIO.""" + + ART_JOB = 'art-job.json' + ATHENA_STDOUT = 'athena_stdout.txt' + JSON = '_EXT0' + + def __init__(self, art_directory, nightly_release, project, platform, nightly_tag): + """Keep arguments.""" + self.art_directory = art_directory + self.nightly_release = nightly_release + self.project = project + self.platform = platform + self.nightly_tag = nightly_tag + + self.table = None + + def exit_if_no_rucio(self): + """Exit if RUCIO is not available.""" + log = logging.getLogger(MODULE) + if not RUCIO: + log.critical("RUCIO not available") + exit(1) + + def get_scope(self, user): + """Return scope.""" + return '.'.join(('user', user)) + + def download(self, did, dst_dir, shell=False): + """Download did into temp directory.""" + log = logging.getLogger(MODULE) + self.exit_if_no_rucio() + + # rucio downloads cache properly + log.info("Shell = %s", shell) + env = os.environ.copy() + if shell: + cmd = ' '.join((os.path.join(self.art_directory, 'art-download.sh'), did, dst_dir)) + env['PATH'] = '.:' + env['PATH'] + else: + cmd = ' '.join(('rucio', 'download', '--dir', dst_dir, did)) + + (exit_code, out, err, command, start_time, end_time) = run_command(cmd, env=env) + if (exit_code != 0): + log.error(err) + log.info(out) + return exit_code + + def xrdcp(self, src, dst, force=False, recursive=False, verbose=False): + """Copy using xrdcp.""" + if src is None or dst is None: + return 1 + log = logging.getLogger(MODULE) + cmd = ' '.join(('xrdcp -N', '-f' if force else '', '-r' if recursive else '', '-v' if verbose else '', src, dst)) + log.debug(cmd) + (exit_code, out, err, command, start_time, end_time) = run_command(cmd, verbose=False) + if exit_code != 0: + log.error(err) + # seems to write empty lines + # log.info(out) + return exit_code + + def __parse_outfile(self, outfile): + """Parse outfile and return tuple (sequence_tag, single_index) or None.""" + # + # Matching: user.artprod.atlas.master.Athena.x86_64-slc6-gcc62-opt.2018-01-21T2301.284099.MuonRecRTT.6.log.13062437.000001.log.tgz + # user.artprod.atlas.master.Athena.x86_64-slc6-gcc62-opt.2018-01-21T2301.284099.MuonRecRTT.6 + # user.artprod.atlas.master.Athena.x86_64-slc6-gcc62-opt.2018-01-19T2301.283573.TrigAnalysisTest + # + PATTERN = r"user\.([^\.]+)\.([^\.]+)\." + self.nightly_release + "\." + self.project + "\." + self.platform + "\." + self.nightly_tag + "\.(.+)" + match = re.search(PATTERN, outfile) + if not match: + return None + + (user, experiment, rest) = match.groups() + + items = rest.split(".") + sequence_tag = items[0] if len(items) > 0 else -1 + try: + single_index = int(items[2]) if len(items) > 2 else -1 + except ValueError: + single_index = -1 + + if single_index < 0: + grid_index = int(items[4]) if len(items) > 4 else -1 + else: + grid_index = int(items[5]) if len(items) > 5 else -1 + + # print outfile, sequence_tag, single_index, grid_index + + return (sequence_tag, single_index, grid_index) + + def get_sequence_tag(self, outfile): + """Return sequence tag or None.""" + result = self.__parse_outfile(outfile) + return result[0] if result is not None else None + + def get_single_index(self, outfile): + """Return single index or -1.""" + result = self.__parse_outfile(outfile) + return result[1] if result is not None else -1 + + def get_grid_index(self, outfile): + """Return frid index or -1.""" + result = self.__parse_outfile(outfile) + return result[2] if result is not None else -1 + + def get_outfile_name(self, user, package, sequence_tag, test_name=None, nightly_tag=None): + """Create outfile name based on parameters.""" + nightly_tag = self.nightly_tag if nightly_tag is None else nightly_tag + outfile = '.'.join(('user', user, 'atlas', self.nightly_release, self.project, self.platform, nightly_tag, sequence_tag, package)) + return outfile if test_name is None else '.'.join((outfile, test_name)) + + def get_outfiles(self, user, package, nightly_tag=None): + """ + Create list of outfiles from parameters. + + example: ['user.artprod.atlas.master.Athena.x86_64-slc6-gcc62-opt.2018-01-21T2301.284099.MuonRecRTT.3'] + """ + log = logging.getLogger(MODULE) + nightly_tag = self.nightly_tag if nightly_tag is None else nightly_tag + + self.exit_if_no_rucio() + rucio_client = rucio.client.Client() + + result = [] + + # look for "batch" outfile, and take latest (by sequence tag) + pattern = self.get_outfile_name(user, package, '*', None, nightly_tag) + outfile = None + sequence = None + for out in rucio_client.list_dids(self.get_scope(user), {'name': '.'.join((pattern, 'log'))}): + sequence_tag = self.get_sequence_tag(out) + if sequence is None or sequence_tag > sequence: + outfile = os.path.splitext(out)[0] + sequence = sequence_tag + + if outfile is not None: + log.debug("Adding 'batch': %s", outfile) + result.append(outfile) + + # look for "single" outfile, deduce sequence_tag + pattern = self.get_outfile_name(user, package, '*', '*', nightly_tag) + log.debug("Trying pattern %s", pattern) + outfile = None + sequence = None + for out in rucio_client.list_dids(self.get_scope(user), {'name': '.'.join((pattern, 'log'))}): + sequence_tag = self.get_sequence_tag(out) + if sequence is None or sequence_tag > sequence: + outfile = os.path.splitext(out)[0] + sequence = sequence_tag + + if outfile is not None: + log.debug("Found %s", outfile) + sequence_tag = self.get_sequence_tag(outfile) + if sequence_tag is not None: + # found sequence_tag, find all 'single' outfiles + pattern = self.get_outfile_name(user, package, sequence_tag, '*', nightly_tag) + for out in rucio_client.list_dids(self.get_scope(user), {'name': '.'.join((pattern, 'log'))}): + outfile = os.path.splitext(out)[0] + log.debug("Adding 'single': %s", outfile) + result.append(outfile) + + return result + + def get_table(self, user, package, nightly_tag=None, shell=False): + """Get full table with grid_index, single_index and test_name for particular package and nightly_tag.""" + log = logging.getLogger(MODULE) + + if self.table is not None: + return self.table + + self.exit_if_no_rucio() + + table = [] + + nightly_tag = self.nightly_tag if nightly_tag is None else nightly_tag + + outfiles = self.get_outfiles(user, package, nightly_tag) + + outfiles_str = [x + ArtRucio.JSON for x in outfiles] + outfiles_str = ' '.join(outfiles_str) + + tmp_dir = tempfile.gettempdir() + dst_dir = tmp_dir + + log.info("Shell = %s", shell) + exit_code = self.download(outfiles_str, dst_dir, shell) + if exit_code != 0: + log.error("Failed to execute rucio download %d", exit_code) + return table + + for outfile in outfiles: + single_index = self.get_single_index(outfile) + + json_directory = os.path.join(dst_dir, outfile + ArtRucio.JSON) + if not os.path.isdir(json_directory): + # print single_index, rucio_name + table.append({ + 'single_index': single_index, + 'grid_index': -1, + 'file_index': -1, + 'job_index': -1, + 'outfile': outfile, + 'job_name': None + }) + continue + + for json_file in os.listdir(json_directory): + json_path = os.path.join(json_directory, json_file) + if os.path.isfile(json_path): + with open(json_path) as json_fd: + info = json.load(json_fd) + job_name = os.path.splitext(info['name'])[0] + + # Match: user.artprod.13199077.EXT0._000002.art-job.json + # Match: user.artprod.13199077.EXT0._000003.art-job.json.4 + # job_index = 13199077, grid_index = 3, file_index = 4 + match = re.search(r"user\.([^\.]+)\.(\d+)\.EXT0\._(\d+)\.art-job.json(?:\.(\d+))?", json_file) + if match: + job_index = int(match.group(2)) + grid_index = int(match.group(3)) + file_index = -1 if match.group(4) is None else int(match.group(4)) + else: + job_index = -1 + grid_index = -1 + file_index = -1 + + table.append({ + 'single_index': single_index, + 'grid_index': grid_index, + 'file_index': file_index, + 'job_index': job_index, + 'outfile': outfile, + 'job_name': job_name + }) + + self.table = table + return table diff --git a/Tools/ART/python/ART/parallelScheduler.py b/Tools/ART/python/ART/parallelScheduler.py deleted file mode 100644 index 42a1d1cfc42a5f7f7a4e1a2c8cea3c05d36293c7..0000000000000000000000000000000000000000 --- a/Tools/ART/python/ART/parallelScheduler.py +++ /dev/null @@ -1,215 +0,0 @@ -''' -Created on 16/05/2012 - - * Repository : https://github.com/victor-gil-sepulveda/pyScheduler - * Licensed under the MIT license (see LICENSE-MIT) - * Copyright (C) 2013 Victor Alejandro Gil Sepulveda - -@author: victor -''' -import multiprocessing -from serialScheduler import SerialScheduler -import sys - -def printnflush(*args): - """ - Prints and flushes the things passes as arguments. - @param args: The data we want to print. - """ - if False: - print args - sys.stdout.flush() - -def run_task(process_name, tasks, pipe_end): - """ - Helper function to run tasks inside a process. It implements an infinite loop controlled by the messages - received from 'pipe_end'. - Messages from the pipe are (message_type, value) tuples. Thsi is the currently implemented protocol: - - "EXECUTE": Runs the task with id == value. - -> Sends a "TASK FINISHED" message with value = (task_id, task_result) - - "FINISH": Ends the loop so that process can end and free its resources. - @param process_name: Unique id of the process executing this function. - @param tasks: The dictionary of all tasks (we want to execute in this scheduling) indexed by their id . - @param pipe_end: A process pipe used to send/receive messages from/to the master. - """ - task_ended = False - try: - while not task_ended: - # Blocks until it receives a message - message_type, value = pipe_end.recv() - - if message_type == "EXECUTE": - result = tasks[value].run() - pipe_end.send(("TASK FINISHED", (value, result))) - - elif message_type == "FINISH": - printnflush( "Communication successfully closed for",process_name) - task_ended = True - else: - printnflush("Unexpected message: %s"%message_type) - task_ended = True - - except EOFError: - printnflush("Communication closed due to remote closing of the pipe in process %s"%process_name) - - except Exception, msg: - printnflush("Communication closed due to unexpected exception: %s"%msg) - - pipe_end.close() - printnflush( "Task reached end") - -class TaskRunner(object): - """ - Helper class that encapsulates a process used to execute a subset of the tasks list. - """ - def __init__(self, process_name, target_function, tasks): - """ - Creates the process that will be in charge of executing the tasks and a pipe to communicate - with the main process. - @param process_name: Unique id for this task executor. - @param target_function: Is the function the process will execute. In the case of ProcessParallelScheduler - the function used is 'run_task', however it can use any function that receives the same parameters that - 'run_task' needs. - @param tasks: The dictionary of all tasks. - """ - self.pipe_start, self.pipe_end = multiprocessing.Pipe() - printnflush ("Process started: %s"%process_name) - self.process = multiprocessing.Process(group=None, - target=target_function, - name=process_name, - args = (process_name, tasks, self.pipe_end)) - self.busy = False - - def run(self): - """ - Starts the inner process (and therefore the defined function that is going to be used to control the - messages). - """ - self.process.start() - - def execute_task(self, task_name): - """ - Sends the process an "EXECUTE" task message to run the task named 'task_name'. - @param task_name: Name of the task to be executed. - """ - self.busy = True - self.pipe_start.send(("EXECUTE",task_name)) - - def set_task_finished(self): - """ - Sets the 'busy' flag in order to mark this task executor as busy (its associated process is - performing a task) - """ - self.busy = False - - def finalize(self): - """ - Sends a finalization message (forces the associated process to break the loop and end)- - """ - self.busy = False - self.pipe_start.send(("FINISH",None)) - self.process.join() - if self.process.is_alive(): - self.process.terminate() - - def has_an_incomming_message(self): - """ - True if this task runner has received a message from its associated process. - """ - return self.pipe_start.poll(1) - - def get_message(self): - """ - Returns the message the associated process sent (using the 'run_task' function it can only be a - "TASK FINISHED" message) - """ - return self.pipe_start.recv() - -class ParallelScheduler(SerialScheduler): - """ - Scheduler type that works by creating a limited number of processes and distributing the tasks between them. - """ - - def __init__(self, max_processes, functions = {}): - """ - Creates the scheduler. - @param max_processes: Indeed is the total number of processes that will be used for the scheduling parallelization - plus one (which is representing the current process). - @param functions: @see SerialScheduler - """ - SerialScheduler.__init__(self,functions) - self.number_of_processes = max_processes - 1 - self.running = [] - - def run(self): - """ - Like in the SerialScheduler, this function tries to run all the tasks, checking their dependencies. In this case - some processes will be spawned so that they can share the work of executing the tasks. - This run function acts as the real scheduler, telling the 'task executor' objects which task to run. This kind - of dynamic scheduling fosters an efficient use of the resources (every time a 'task executor' ends a task, it is - told to run another one, so that load is balanced). - This is a simple implementation of a master-slave pattern (where slaves are the task runners). - """ - self.function_exec('scheduling_started', {"number_of_tasks":len(self.not_completed)}) - - # Create processes - available_workers = self.number_of_processes - task_runners = [] - for i in range(available_workers): - process_name = "TaskExecutor"+str(i) - runner = TaskRunner(process_name, run_task, self.tasks) - runner.run() - task_runners.append(runner) - - # Execute all tasks - while not len(self.finished) == len(self.tasks): - cannot_choose_a_task = False - - # Choose an available process - task_name = self.choose_runnable_task() - - # Try to execute it - if task_name is not None: - # If we can still execute a task we find a free task runner to do it - for task_runner in task_runners: - if not task_runner.busy: - self.function_exec('task_started', {"task_name":task_name}) - task_runner.execute_task(task_name) - self.lock_task(task_name) # Ensure that it can't be selected again until task is finished - self.running.append(task_name) - break - else: - cannot_choose_a_task = True - - if cannot_choose_a_task or len(self.running) == available_workers: - # If there is not an available task (so all remaining tasks have dependencies) or - # we do not have any available worker, it's time to block until we receive results. - - # We start polling busy runners pipes to wait for a result and add this result to the - # results list - task_finished = False - while not task_finished: - for task_runner in task_runners: - if task_runner.busy and task_runner.has_an_incomming_message(): - message, value = task_runner.get_message() - if message == "TASK FINISHED": - task_name, result = value - self.function_exec('task_ended', {"task_name":task_name, "finished":len(self.finished)}) - self.running.remove(task_name) - self.complete_task(task_name) - self.remove_from_dependencies(task_name) - task_runner.set_task_finished() - self.results.append(result) - else: - printnflush ( "Unexpected message: %s"%message) - exit() - task_finished = True - - printnflush ("Sending processes termination message.") - - for task_runner in task_runners: - task_runner.finalize() - - self.function_exec('scheduling_ended') - - return self.results diff --git a/Tools/ART/python/ART/serialScheduler.py b/Tools/ART/python/ART/serialScheduler.py deleted file mode 100644 index 9272f8eff6f325652d262d0f60d81795369e581a..0000000000000000000000000000000000000000 --- a/Tools/ART/python/ART/serialScheduler.py +++ /dev/null @@ -1,177 +0,0 @@ -''' -Created on 16/08/2012 - - * Repository : https://github.com/victor-gil-sepulveda/pyScheduler - * Licensed under the MIT license (see LICENSE-MIT) - * Copyright (C) 2013 Victor Alejandro Gil Sepulveda - -@author: victor -''' -class Task(object): - """ - Representation of a task. - """ - def __init__(self, function, name, kwargs, description = ""): - """ - Creates a Task object. - @param function: A callable object that will perform the real work of the task. - @param name: The name of the task (an identifier). - @param kkwargs: Parameters for the callable. - @param description: A short description of what the task does. Can be empty. - """ - self.function = function - self.name = name - self.kwargs = kwargs - self.result = None - - def run(self): - """ - Runs the task's associated callable and returns its result. - @return: The result of the callable execution. - """ - self.result = self.function(**(self.kwargs)) - return self.result - -class SerialScheduler(object): - """ - Base scheduling class. It ensures that no task is executed before its dependencies (without building a - dependency tree). - It allows to define some functions that will be executed when the scheduler reaches some strategic points. - TODO: In all scheduler types a dependencies must be checked to avoid cycles for instance. - """ - - def __init__(self, functions = {}): - """ - Constructor. Initializes needed variables. - - @param fucntions: A dictionary containing 3 possible keys. Each key defines another dictionary of two - entries ('function' and 'kwargs') with a callable and its arguments. The possible keys are: - 'task_started' -> Were an action performed after each task is called is defined. - 'task_ended' -> Defines the action performed when a task is finished. - 'scheduling_started' -> Defines the action performed when the scheduler starts to run tasks. - 'scheduling_ended' -> Defines the action performed when the scheduler has finished to run all tasks. - """ - self.functions = functions - self.tasks = {} - self.dependencies = {} - self.not_completed = [] - self.finished = [] - self.results = [] - - def function_exec(self, function_type, info = None): - """ - Execute one of the predefined functions if defined. - - @param function_type: Type of the function to check and run (proper types should be 'task_start','task_end' - and 'scheduling_end', each defining 'function' and 'kwargs' entries. - - """ - if function_type in self.functions: - self.functions[function_type]['kwargs']['info'] = info - self.functions[function_type]['function'](**(self.functions[function_type]['kwargs'])) - - def run(self): - """ - Runs all the tasks in a way that tasks are not executed before their dependencies are - cleared. - - @return: An array with the results of task calculations. - """ - self.function_exec('scheduling_started', {"number_of_tasks":len(self.not_completed)}) - - ordered_tasks = self.get_ordered_tasks() - - for task in ordered_tasks: - self.function_exec('task_started', {"task_name":task.name}) - self.results.append(task.run()) - self.function_exec('task_ended', {"task_name":task.name, "finished":len(self.finished)}) - - self.function_exec('scheduling_ended') - - return self.results - - def get_ordered_tasks(self): - """ - Returns a list of task names so that any task name will have an index bigger than the tasks it depends on. - - @return: A list of task names. - """ - ordered_tasks = [] - while len( self.not_completed) > 0: - #Choose an available process - task_name = self.choose_runnable_task() - - if task_name is None: - print "It was impossible to pick a suitable task for running. Check dependencies." - return [] - else: - # Run a process - ordered_tasks.append(self.tasks[task_name]) - self.lock_task(task_name) - self.complete_task(task_name) - self.remove_from_dependencies(task_name) - return ordered_tasks - - def choose_runnable_task(self): - """ - Returns a task name which dependencies have already been fulfilled. - - @return: The task name. - """ - for task_name in self.not_completed: - if len(self.dependencies[task_name]) == 0: # This process has no dependencies - return task_name; - return None # All task have dependencies (circular dependencies for instance) - - - def lock_task(self, task_name): - """ - Removes a task from the 'not complete list' making it unavailable for further selections. - - @param task_name: The name of the task to lock. - """ - # Remove it from the not_completed list - self.not_completed.remove(task_name) - - def complete_task(self, task_name): - """ - Adds a task to the list of completed tasks. - - @param task_name: The name of the task to complete. - """ - self.finished.append(task_name) - - def remove_from_dependencies(self, task_name): - """ - Removes a task from the dependencies of all other uncomplete tasks. At the end of execution, all dependency - lists must be empty. - - @param task_name: The name of the task to remove from dependencies. - """ - for tn in self.dependencies: - if task_name in self.dependencies[tn]: - self.dependencies[tn].remove(task_name) - - def add_task(self, task_name, dependencies, target_function, function_kwargs, description): - """ - Adds a task to the scheduler. The task will be executed along with the other tasks when the 'run' function is called. - - @param task_name: - @param dependencies: A list with the task_names of the tasks that must be fulfilled before executing this other task. - Example of dependencies dictionary: - {"task_C":["dep_task_A", "dep_task_B"]} - This dependencies dict. means that task C cannot be run until task B and A are cleared. - @param target_function: The function executed by this task. - @param function_kwargs: Its arguments. - @param description: A brief description of the task. - """ - - if not task_name in self.tasks: - task = Task( name = task_name, description = description, function = target_function, kwargs=function_kwargs) - task.description = description - self.tasks[task_name] = task - self.not_completed.append(task_name) - self.dependencies[task_name] = dependencies - else: - print "[Error SerialScheduler::add_task] Task %s already exists. Task name must be unique."%task_name - exit() diff --git a/Tools/ART/scripts/art-diff.py b/Tools/ART/scripts/art-diff.py index 8e56071c97483aafef3ea2fd3356d90169aad3fc..8a3b9340934a28e81ec7b40c754f377d20e45851 100755 --- a/Tools/ART/scripts/art-diff.py +++ b/Tools/ART/scripts/art-diff.py @@ -1,30 +1,34 @@ #!/usr/bin/env python -# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration """ ART - ATLAS Release Tester - Diff. Usage: - art-diff.py [--diff-type=<diff_type> --exclude=<pattern>... --platform=<platform> --platform-ref=<platform>] <nightly_release> <project> <nightly_tag> <nightly_release_ref> <platform_ref> <nightly_tag_ref> <package> - art-diff.py [--diff-type=<diff_type> --exclude=<pattern>...] <dir> <ref_dir> + art-diff.py [--diff-type=<diff_type> --exclude=<pattern>... --platform-ref=<platform> --entries=<entries>] <nightly_release_ref> <project_ref> <nightly_tag_ref> <package> + art-diff.py [--diff-type=<diff_type> --exclude=<pattern>... --entries=<entries>] <path> <ref_path> Options: --diff-type=<diff_type> Type of diff (e.g. diff-pool or diff-root) [default: diff-pool] + --entries=<entries> Only diff over number of entries [default: -1] --exclude=<pattern>... Exclude test files according to pattern -h --help Show this screen - --platform=<platform> Platform [default: x86_64-slc6-gcc62-opt] --platform-ref=<platform> Reference Platform [default: x86_64-slc6-gcc62-opt] + --test-name=<test_name> Test name to compare --version Show version Arguments: - dir Directory to compare - nightly_release Name of the nightly release (e.g. 21.0) - nightly_release_ref Reference Name of the nightly release (e.g. 21.0) - nightly_tag Nightly tag (e.g. 2017-02-26T2119) - nightly_tag_ref Reference Nightly tag (e.g. 2017-02-26T2119) - package Package of the test (e.g. Tier0ChainTests) - project Name of the project (e.g. Athena) - project_ref Reference Name of the project (e.g. Athena) - ref_dir Directory to compare to + path Directory or File to compare + nightly_release_ref Reference Name of the nightly release (e.g. 21.0) + nightly_tag_ref Reference Nightly tag (e.g. 2017-02-26T2119) + package Package of the test (e.g. Tier0ChainTests) + project_ref Reference Name of the project (e.g. Athena) + ref_path Directory or File to compare to + +Environment: + AtlasBuildBranch Name of the nightly release (e.g. 21.0) + AtlasProject Name of the project (e.g. Athena) + <AtlasProject>_PLATFORM Platform (e.g. x86_64-slc6-gcc62-opt) + AtlasBuildStamp Nightly tag (e.g. 2017-02-26T2119) """ __author__ = "Tulay Cuhadar Donszelmann <tcuhadar@cern.ch>" @@ -39,8 +43,9 @@ import sys from ART.docopt import docopt -VERSION = "0.6.7" +VERSION = "0.7.8" ATHENA_STDOUT = "athena_stdout.txt" +DEFAULT_ENTRIES = -1 class ArtDiff(object): @@ -48,43 +53,71 @@ class ArtDiff(object): EOS_OUTPUT_DIR = '/eos/atlas/atlascerngroupdisk/data-art/grid-output' - def __init__(self, arguments): + def __init__(self): """Constructor of ArtDiff.""" + + def parse(self, arguments): + """Called from comandline.""" diff_type = arguments['--diff-type'] + entries = arguments['--entries'] excludes = arguments['--exclude'] - if arguments['<dir>'] is None: - nightly_release = arguments['<nightly_release>'] - project = arguments['<project>'] - platform = arguments['--platform'] - nightly_tag = arguments['<nightly_tag>'] + if arguments['<nightly_release_ref>'] is not None: + try: + nightly_release = os.environ['AtlasBuildBranch'] + project = os.environ['AtlasProject'] + platform = os.environ[project + '_PLATFORM'] + nightly_tag = os.environ['AtlasBuildStamp'] + return (nightly_release, project, platform, nightly_tag) + except KeyError, e: + print "Environment variable not set", e + sys.exit(1) nightly_release_ref = arguments['<nightly_release_ref>'] project_ref = arguments['<project_ref>'] - platform_ref = arguments['--platform_ref'] + platform_ref = arguments['--platform-ref'] nightly_tag_ref = arguments['<nightly_tag_ref>'] package = arguments['<package>'] - - exit(self.diff(nightly_release, project, platform, nightly_tag, nightly_release_ref, project_ref, platform_ref, nightly_tag_ref, package, diff_type, excludes)) + print nightly_release, project, platform, nightly_tag, nightly_release_ref, project_ref, platform_ref, nightly_tag_ref + exit(self.diff(nightly_release, project, platform, nightly_tag, nightly_release_ref, project_ref, platform_ref, nightly_tag_ref, package, diff_type, excludes, entries=entries)) # directory compare - directory = arguments['<dir>'] - ref_dir = arguments['<ref_dir>'] - exit(self.diff_dirs(directory, ref_dir, diff_type, excludes)) + path = arguments['<path>'] + ref_path = arguments['<ref_path>'] + + if os.path.isfile(path): + # file compare + if not os.path.isfile(ref_path): + print "Error: <ref_path> should be a file, if <path> is a file." + sys.exit(1) + + exit(self.diff_file(path, ref_path, diff_type, entries=entries)) + + if os.path.isfile(ref_path): + print "Error: <ref_path> should be a directory, if <path> is a directory." + sys.exit(1) - def diff(self, nightly_release, project, platform, nightly_tag, nightly_release_ref, project_ref, platform_ref, nightly_tag_ref, package, diff_type, excludes=[]): + # check if path contains "test_" entries + if len(glob.glob(os.path.join(path, 'test_*'))) > 0: + # directory compare + exit(self.diff_dirs(path, ref_path, diff_type, excludes, entries=entries)) + + # single test compare + exit(self.diff_test(path, ref_path, diff_type, entries=entries)) + + def diff(self, nightly_release, project, platform, nightly_tag, nightly_release_ref, project_ref, platform_ref, nightly_tag_ref, package, diff_type, excludes=[], entries=DEFAULT_ENTRIES): """Run difference between two results.""" - val_dir = os.path.join(ArtDiff.EOS_OUTPUT_DIR, nightly_release, nightly_tag, project, platform, package) - ref_dir = os.path.join(ArtDiff.EOS_OUTPUT_DIR, nightly_release_ref, nightly_tag_ref, project_ref, platform_ref, package) - return self.diff_dirs(val_dir, ref_dir, diff_type, excludes) + path = os.path.join(ArtDiff.EOS_OUTPUT_DIR, nightly_release, project, platform, nightly_tag, package) + ref_path = os.path.join(ArtDiff.EOS_OUTPUT_DIR, nightly_release_ref, project_ref, platform_ref, nightly_tag_ref, package) + return self.diff_dirs(path, ref_path, diff_type, excludes, entries=entries) - def diff_dirs(self, val_dir, ref_dir, diff_type, excludes=[]): + def diff_dirs(self, path, ref_path, diff_type, excludes=[], entries=DEFAULT_ENTRIES): """Run difference between two directories.""" - print "val_dir: %s" % val_dir - print "ref_dir: %s" % ref_dir + print " path: %s" % path + print "ref_path: %s" % ref_path stat_per_chain = {} - for test_name in os.listdir(val_dir): + for test_name in os.listdir(path): # skip tests in pattern exclude_test = False for exclude in excludes: @@ -98,55 +131,59 @@ class ArtDiff(object): print "******************************************" print "Test: %s" % test_name print "******************************************" - - val_result = self.get_result(os.path.join(val_dir, test_name)) - ref_result = self.get_result(os.path.join(val_dir, test_name)) - for key, value in val_result.iteritems(): - if key in ref_result: - print "%-10s: ref: %d events, val: %d events" % (key, int(ref_result[key][1]), int(val_result[key][1])) - - test_dir = os.path.join(val_dir, test_name) - test_patterns = ['*AOD*.pool.root', '*ESD*.pool.root', '*HITS*.pool.root', '*RDO*.pool.root', '*TAG*.root'] - test_files = [] - for test_pattern in test_patterns: - test_files.extend(glob.glob(os.path.join(test_dir, test_pattern))) - for test_file in test_files: - extension = '.root' - name = os.path.splitext(os.path.basename(test_file))[0] # remove .root - if name.endswith('.pool'): - extension = '.pool.root' - name = os.path.splitext(os.path.basename(name))[0] # remove .pool - val_file = os.path.join(val_dir, test_name, name + extension) - ref_file = os.path.join(ref_dir, test_name, name + extension) - print "val_file: %s" % val_file - print "ref_file: %s" % ref_file - - if not os.path.exists(ref_file): - print "no test found in ref_dir to compare: %s" % ref_file - continue - - # add the test to the summary if it was not already there - if test_name not in stat_per_chain: - stat_per_chain[test_name] = 0 - - if extension == '.pool.root': - if diff_type == 'diff-pool': - stat_per_chain[test_name] |= self.diff_pool(val_file, ref_file) - else: - stat_per_chain[test_name] |= self.diff_root(val_file, ref_file) - else: - stat_per_chain[test_name] |= self.diff_tag(val_file, ref_file) + stat_per_chain[test_name] = self.diff_test(os.path.join(path, test_name), os.path.join(ref_path, test_name), diff_type, entries=entries) result = 0 - for filename, status in stat_per_chain.iteritems(): + for test_name, status in stat_per_chain.iteritems(): if status: - print "%-70s CHANGED" % filename + print "%-70s CHANGED" % test_name result = 1 else: - print "%-70s IDENTICAL" % filename + print "%-70s IDENTICAL" % test_name return result + def diff_test(self, path, ref_path, diff_type, entries=DEFAULT_ENTRIES): + """Run differences between two directories.""" + result = self.get_result(path) + ref_result = self.get_result(ref_path) + for key, value in result.iteritems(): + if key in ref_result: + print "%-10s: ref: %d events, val: %d events" % (key, int(ref_result[key][1]), int(result[key][1])) + + test_dir = path + test_patterns = ['*AOD*.pool.root', '*ESD*.pool.root', '*HITS*.pool.root', '*RDO*.pool.root', '*TAG*.root'] + # get files in all patterns + test_files = [] + for test_pattern in test_patterns: + test_files.extend(glob.glob(os.path.join(test_dir, test_pattern))) + # run test over all files + result = 0 + for test_file in test_files: + basename = os.path.basename(test_file) + val_file = os.path.join(path, basename) + ref_file = os.path.join(ref_path, basename) + print "val_file: %s" % val_file + print "ref_file: %s" % ref_file + + result |= self.diff_file(val_file, ref_file, diff_type, entries=entries) + + return result + + def diff_file(self, path, ref_path, diff_type, entries=DEFAULT_ENTRIES): + """Compare two files.""" + if not os.path.exists(ref_path): + print "no test found in ref_dir to compare: %s" % ref_path + return 0 + + if fnmatch.fnmatch(path, '*TAG*.root'): + return self.diff_tag(path, ref_path) + + if diff_type == 'diff-pool': + return self.diff_pool(path, ref_path) + + return self.diff_root(path, ref_path, entries) + def get_result(self, directory): """ Return map [ESD|AOD,...] -> (success, succeeded event count). @@ -189,7 +226,7 @@ class ArtDiff(object): return stat - def diff_root(self, file_name, ref_file, entries=-1): + def diff_root(self, file_name, ref_file, entries): """TBD.""" # diff-root (code, out, err) = self.run_command("acmd.py diff-root " + file_name + " " + ref_file + " --error-mode resilient --ignore-leaves RecoTimingObj_p1_HITStoRDO_timings RecoTimingObj_p1_RAWtoESD_mems RecoTimingObj_p1_RAWtoESD_timings RAWtoESD_mems RAWtoESD_timings ESDtoAOD_mems ESDtoAOD_timings HITStoRDO_timings RAWtoALL_mems RAWtoALL_timings RecoTimingObj_p1_RAWtoALL_mems RecoTimingObj_p1_RAWtoALL_timings RecoTimingObj_p1_EVNTtoHITS_timings --entries " + str(entries)) @@ -207,7 +244,7 @@ class ArtDiff(object): The command runs as separate subprocesses for every piped command. Returns tuple of exit_code, output and err. """ - print "Execute: %s" % cmd + print "Execute:", cmd if "|" in cmd: cmd_parts = cmd.split('|') else: @@ -234,4 +271,4 @@ if __name__ == '__main__': exit(1) arguments = docopt(__doc__, version=os.path.splitext(os.path.basename(__file__))[0] + ' ' + VERSION) - ArtDiff(arguments) + ArtDiff().parse(arguments) diff --git a/Tools/ART/scripts/art-download.sh b/Tools/ART/scripts/art-download.sh new file mode 100755 index 0000000000000000000000000000000000000000..b852dc8d75342e6ce4cbfd2b652a220f1c5d2c24 --- /dev/null +++ b/Tools/ART/scripts/art-download.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration +# +# NOTE do NOT run with /bin/bash -x as the output is too big for gitlab-ci +# arguments: INPUTNAME +# +# author : Tulay Cuhadar Donszelmann <tcuhadar@cern.ch> +# +# example: art-download NAME DIRECTORY + +if [ $# -ne 2 ]; then + echo 'Usage: art-get-input.sh NAME DIRECTORY' + exit 1 +fi + +NAME=$1 +shift +DIRECTORY=$1 +shift + +export ATLAS_LOCAL_ROOT_BASE="${ATLAS_LOCAL_ROOT_BASE:-/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase}" +# shellcheck source=/dev/null +source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh --quiet + +unset ALRB_noGridMW + +lsetup -f rucio + +echo "Name: ${NAME}" +echo "Directory: ${DIRECTORY}" + +# Do not use: rucio delivers warnings as exit code 127 +#set -e + +rucio download --dir "${DIRECTORY}" "${NAME}" diff --git a/Tools/ART/scripts/art-internal.py b/Tools/ART/scripts/art-internal.py index 8c700482a7b965bf8a674ee24b0fab2bcd65c025..7eb30f6791318a0397ab4512a401bbbd9c82114d 100755 --- a/Tools/ART/scripts/art-internal.py +++ b/Tools/ART/scripts/art-internal.py @@ -1,37 +1,37 @@ #!/usr/bin/env python -# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration """ ART-internal - ATLAS Release Tester (internal command). Usage: - art-internal.py job build [-v -q] <script_directory> <package> <job_type> <sequence_tag> <index> <out> <nightly_release> <project> <platform> <nightly_tag> - art-internal.py job grid [-v -q --skip-setup] <script_directory> <package> <job_type> <sequence_tag> <index_type> <index_or_name> <out> <nightly_release> <project> <platform> <nightly_tag> + art-internal.py build job [-v -q] <script_directory> <sequence_tag> <package> <outfile> <job_type> <job_index> + art-internal.py grid batch [-v -q --skip-setup -n] <script_directory> <sequence_tag> <package> <outfile> <job_type> <job_index> + art-internal.py grid single [-v -q --skip-setup --in=<in_file> -n] <script_directory> <sequence_tag> <package> <outfile> <job_name> Options: - --skip-setup Do not run atlas setup or voms -h --help Show this screen. + --skip-setup Do not run atlas setup or voms + --in=<in_file> Normally percentage IN + -n --no-action No real submit will be done -q --quiet Show less information, only warnings and errors -v --verbose Show more information, debug level --version Show version. -Sub-commands: - job Run a single job, given a particular index - copy Copy outputs to eos area - Arguments: - index_type Type of index used (e.g. batch or single) - index Index of the test inside the package - index_or_name Index of the test (batch), or its name (single) - nightly_release Name of the nightly release (e.g. 21.0) - nightly_tag Nightly tag (e.g. 2017-02-26T2119) - out Tar filename used for the output of the job + job_index Index of the test inside the package + job_name Index of the test (batch), or its name (single) + job_type Type of job (e.g. grid, ci, build) + outfile Tar filename used for the output of the job package Package of the test (e.g. Tier0ChainTests) - platform Platform (e.g. x86_64-slc6-gcc62-opt) - project Name of the project (e.g. Athena) script_directory Directory containing the package(s) with tests sequence_tag Sequence tag (e.g. 0 or PIPELINE_ID) submit_directory Temporary directory with all files for submission - job_type Type of job (e.g. grid, ci, build) + +Environment: + AtlasBuildBranch Name of the nightly release (e.g. 21.0) + AtlasProject Name of the project (e.g. Athena) + <AtlasProject>_PLATFORM Platform (e.g. x86_64-slc6-gcc62-opt) + AtlasBuildStamp Nightly tag (e.g. 2017-02-26T2119) """ __author__ = "Tulay Cuhadar Donszelmann <tcuhadar@cern.ch>" @@ -44,34 +44,51 @@ from ART.docopt_dispatch import dispatch from ART import ArtGrid, ArtBuild -from ART.art_misc import set_log +from ART.art_misc import get_atlas_env, set_log MODULE = "art.internal" -@dispatch.on('job', 'build') -def job_build(script_directory, package, job_type, sequence_tag, index, out, nightly_release, project, platform, nightly_tag, **kwargs): - """Run a single job, given a particular index. +@dispatch.on('build', 'job') +def build_job(script_directory, sequence_tag, package, outfile, job_type, job_index, **kwargs): + """Build a single job, given a particular index. Tests are called with the following parameters: SCRIPT_DIRECTORY, PACKAGE, TYPE, TEST_NAME """ set_log(kwargs) art_directory = os.path.dirname(os.path.realpath(sys.argv[0])) - exit(ArtBuild(art_directory, nightly_release, project, platform, nightly_tag, script_directory).job(package, job_type, sequence_tag, index, out)) + (nightly_release, project, platform, nightly_tag) = get_atlas_env() + exit(ArtBuild(art_directory, nightly_release, project, platform, nightly_tag, script_directory).job(sequence_tag, package, outfile, job_type, job_index)) + + +@dispatch.on('grid', 'batch') +def grid_batch(script_directory, sequence_tag, package, outfile, job_type, job_index, **kwargs): + """Run a batch job, given a particular index. + + Tests are called with the following parameters: + SCRIPT_DIRECTORY, PACKAGE, TYPE, TEST_NAME, STAGE + """ + set_log(kwargs) + art_directory = os.path.dirname(os.path.realpath(sys.argv[0])) + (nightly_release, project, platform, nightly_tag) = get_atlas_env() + skip_setup = kwargs['skip_setup'] + exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag, script_directory, skip_setup).batch(sequence_tag, package, outfile, job_type, job_index)) -@dispatch.on('job', 'grid') -def job_grid(script_directory, package, job_type, sequence_tag, index_type, index_or_name, out, nightly_release, project, platform, nightly_tag, **kwargs): - """Run a single job, given a particular index. +@dispatch.on('grid', 'single') +def grid_single(script_directory, sequence_tag, package, outfile, job_name, **kwargs): + """Run a single job, given a particular name. Tests are called with the following parameters: - SCRIPT_DIRECTORY, PACKAGE, TYPE, TEST_NAME, NIGHTLY_RELEASE, PROJECT, PLATFORM, NIGHTLY_TAG + SCRIPT_DIRECTORY, PACKAGE, TYPE, TEST_NAME, STAGE, IN_FILE """ set_log(kwargs) art_directory = os.path.dirname(os.path.realpath(sys.argv[0])) + (nightly_release, project, platform, nightly_tag) = get_atlas_env() skip_setup = kwargs['skip_setup'] - exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag, script_directory, skip_setup).job(package, job_type, sequence_tag, index_type, index_or_name, out)) + in_file = kwargs['in'] + exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag, script_directory, skip_setup).single(sequence_tag, package, outfile, job_name, in_file)) if __name__ == '__main__': diff --git a/Tools/ART/scripts/art-share.py b/Tools/ART/scripts/art-share.py index 9d776fc777d0fb5eb0f2b99d0f80552fb527b070..46732ebc00f8a2393022bca5ae4e41c02cf5a32f 100755 --- a/Tools/ART/scripts/art-share.py +++ b/Tools/ART/scripts/art-share.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration """ ART - ATLAS Release Tester - Share. diff --git a/Tools/ART/scripts/art-task-build.sh b/Tools/ART/scripts/art-task-build.sh index cb0ac02ce9e06532a9f90b8f175e04a14c06b438..80a4a7d08e8ff534a0df7e82edb1c99bac1393ec 100755 --- a/Tools/ART/scripts/art-task-build.sh +++ b/Tools/ART/scripts/art-task-build.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration -# arguments: RELEASE_BASE, PROJECT, PLATFORM, DATESTAMP +# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration +# arguments: RELEASE_BASE, PROJECT, PLATFORM # author : Tulay Cuhadar Donszelmann <tcuhadar@cern.ch>, Emil Obreshkov <Emil.Obreshkov@cern.ch> echo "INFO: Script executed by $(whoami) on $(date)" @@ -8,59 +8,65 @@ echo "INFO: Script executed by $(whoami) on $(date)" RELEASE_BASE=$1 PROJECT=$2 PLATFORM=$3 -DATESTAMP=$4 -echo Release base $RELEASE_BASE -echo Project $PROJECT -echo Platform $PLATFORM -echo Date $DATESTAMP - -BRANCH=`echo $RELEASE_BASE |tr "/" " " |awk '{print $5}'` - -# setup for the build -[[ "${ATLAS_LOCAL_ROOT_BASE}" = "" ]] && export ATLAS_LOCAL_ROOT_BASE="/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase" -source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh --quiet -lsetup asetup -asetup none,cmakesetup --platform ${PLATFORM} +BRANCH="$(echo "${RELEASE_BASE}" | tr '/' ' ' | awk '{print $5}')" +echo BRANCH "${BRANCH}" if [ -d /cvmfs/atlas.cern.ch/repo/sw/tdaq ]; then echo "WARNING: Setting TDAQ_RELEASE_BASE to /cvmfs/atlas.cern.ch/repo/sw/tdaq" export TDAQ_RELEASE_BASE=/cvmfs/atlas.cern.ch/repo/sw/tdaq else - echo "Error: Cannot find TDAQ software installation" + echo "ERROR: Cannot find TDAQ software installation" return 1 fi -source ${RELEASE_BASE}/build/install/${PROJECT}/*/InstallArea/${PLATFORM}/setup.sh -# setup as if asetup was run -export AtlasBuildBranch=$BRANCH -export AtlasProject=$PROJECT -export ${AtlasProject}_PLATFORM=$PLATFORM -export AtlasBuildStamp=$DATESTAMP +export ATLAS_LOCAL_ROOT_BASE="${ATLAS_LOCAL_ROOT_BASE:-/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase}" +# shellcheck source=/dev/null +source "${ATLAS_LOCAL_ROOT_BASE}"/user/atlasLocalSetup.sh --quiet +if [ "${BRANCH}" == "master" ]; then + lsetup -a testing asetup + echo "INFO: setting up for master" +else + lsetup -a current asetup + echo "INFO: setting up for ${BRANCH}" +fi +asetup "${PROJECT}" --platform "${PLATFORM}" --releasebase "${RELEASE_BASE}"/build/install --noLcgReleaseBase + + +# setup AtlasBuildBranch since that is not set bu the above asetup for the local build setup +export AtlasBuildBranch=${BRANCH} +# for nightly testing point AtlasVersion to AtlasBuildStamp +export AtlasVersion="${AtlasBuildStamp}" + +echo "TDAQ_RELEASE_BASE = ${TDAQ_RELEASE_BASE}" +echo "AtlasBuildBranch = ${AtlasBuildBranch}" +echo "AtlasProject = ${AtlasProject}" +echo "AtlasBuildStamp = ${AtlasBuildStamp}" +echo "AtlasVersion = ${AtlasVersion}" -ART_DIRECTORY=`which art.py` -ART_VERSION=`art.py --version` +ART_DIRECTORY=$(command -v art.py) +ART_VERSION=$(art.py --version) echo "INFO: Using ART version ${ART_VERSION} in ${ART_DIRECTORY} directory" # run build tests -SUBDIR=${BRANCH}/${PROJECT}/${PLATFORM}/${DATESTAMP} +SUBDIR=${AtlasBuildBranch}/${AtlasProject}/${PLATFORM}/${AtlasBuildStamp} OUTDIR="${RELEASE_BASE}/art-build/${SUBDIR}" -CMD="art.py run ${RELEASE_BASE}/athena ${OUTDIR}" -echo ${CMD} -RESULT=`eval "${CMD}"` -echo ${RESULT} +CMD="art.py run ${RELEASE_BASE}/build/install/${AtlasProject}/*/InstallArea/${PLATFORM}/src ${OUTDIR}" +echo "${CMD}" +RESULT=$(eval "${CMD}") +echo "${RESULT}" # copy the test results to EOS area if [ -z "${EOS_MGM_URL}" ]; then echo "WARNING: EOS_MGM_URL variable is empty, setting it to root://eosatlas.cern.ch" - export EOS_MGM_URL="root://eosatlas.cern.ch" + export EOS_MGM_URL="root://eosatlas.cern.ch" else echo "EOS_MGM_URL variable contains", ${EOS_MGM_URL} fi TARGETDIR=/eos/atlas/atlascerngroupdisk/data-art/build-output/${SUBDIR} if [[ ! -e ${TARGETDIR} ]]; then - echo Target directory ${TARGETDIR} - eos mkdir -p ${TARGETDIR} - xrdcp -vr ${OUTDIR} ${TARGETDIR} + echo Target directory "${TARGETDIR}" + eos mkdir -p "${TARGETDIR}" + xrdcp -vr "${OUTDIR}" "${TARGETDIR}" fi diff --git a/Tools/ART/scripts/art-task-grid.sh b/Tools/ART/scripts/art-task-grid.sh index ad7be6ed95e0a2b1d4e9657e1a35d5ce89c0b790..0ad216bbc9b905e9f4067958dae4966ae1e3b530 100755 --- a/Tools/ART/scripts/art-task-grid.sh +++ b/Tools/ART/scripts/art-task-grid.sh @@ -1,103 +1,171 @@ #!/bin/bash -# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration # # NOTE do NOT run with /bin/bash -x as the output is too big for gitlab-ci -# arguments: [options] SUBMIT_DIRECTORY SCRIPT_DIRECTORY PACKAGE SEQUENCE_TAG SPLIT NIGHTLY_RELEASE_SHORT PROJECT PLATFORM NIGHTLY_TAG OUT_FILE +# +# Example command lines for three types: +# +# art-task-grid.sh [--no-action] batch <submit_directory> <script_directory> <sequence_tag> <package> <outfile> <job_type> <number_of_tests> +# +# art-task-grid.sh [--no-action] single [--inds <input_file> --n-files <number_of_files> --split <split>] <submit_directory> <script_directory> <sequence_tag> <package> <outfile> <job_name> +# # env: ART_GRID_OPTIONS # # author : Tulay Cuhadar Donszelmann <tcuhadar@cern.ch> # -# options have to be in-order +# options have to be in-order, and at the correct place # -# example: [--skip-setup --test-name TestName --inDS user.tcuhadar.SingleMuon... --nFiles 3 --nEventsPerFile 5] tmp /cvmfs/atlas-nightlies.cern.ch/sw/... Tier0ChainTests grid 316236 3 21.0 Athena x86_64-slc6-gcc62-opt 2017-02-26T2119 user.${USER}.atlas.${NIGHTLY_RELEASE_SHORT}.${PROJECT}.${PLATFORM}.${NIGHTLY_TAG}.${SEQUENCE_TAG}.${PACKAGE}[.${TEST_NUMBER}] -#set -e +# example: [--test-name TestName --inDS user.tcuhadar.SingleMuon... --nFiles 3 --in] tmp /cvmfs/atlas-nightlies.cern.ch/sw/... Tier0ChainTests grid 316236 3 user.${USER}.atlas.${NIGHTLY_RELEASE_SHORT}.${PROJECT}.${PLATFORM}.${NIGHTLY_TAG}.${SEQUENCE_TAG}.${PACKAGE}[.${TEST_NUMBER}] +set -e -echo "Script executed by $(whoami) on $(date)" +echo "art-task-grid.sh executed by $(whoami) on $(date)" -SKIP_SETUP=0 -if [ $1 == "--skip-setup" ]; then - SKIP_SETUP=1 - shift -fi -TYPE_OPTION="batch %RNDM:0" -PATHENA_OPTIONS="--destSE=CERN-PROD_SCRATCHDISK" -PATHENA_TYPE_OPTIONS="" -if [ $1 == "--test-name" ]; then - TYPE_OPTION="single $2" - PATHENA_TYPE_OPTIONS="--forceStaged" - shift - shift -fi -INDS="" -if [ $1 == "--inDS" ]; then - INDS="--inDS $2" - shift - shift -fi -NFILES="" -if [ $1 == "--nFiles" ]; then - NFILES="--nFiles $2" - shift - shift +NO_ACTION=0 +if [ "$1" == "--no-action" ]; then + NO_ACTION=1 + shift + echo "NO_ACTION=${NO_ACTION}" fi + +TYPE=$1 +shift +echo "TYPE=${TYPE}" + +case ${TYPE} in + + 'batch') + echo "Running 'batch'" + SPLIT="" + ;; + 'single') + echo "Running 'single'" + INDS="" + if [ "$1" == "--inds" ]; then + INDS="--inDS $2" + shift + shift + fi + NFILES="" + NFILES_PER_JOB="" + if [ "$1" == "--n-files" ]; then + NFILES="--nFiles $2" + NFILES_PER_JOB="--nFilesPerJob $2" + shift + shift + fi + SPLIT="" + LARGE_JOB="--long --memory 4096" + if [ "$1" == "--split" ]; then + SPLIT="--split $2" + NFILES_PER_JOB="" + LARGE_JOB="" + shift + shift + fi + IN_FILE="" + if [ "$1" == "--in" ]; then + IN_FILE="--in=%IN" + shift + fi + NCORES="" + if [ "$1" == "--ncore" ]; then + NCORES="--nCore $2" + NFILES_PER_JOB="" + shift + shift + fi + ;; + *) + echo "Unknown TYPE: ${TYPE}" + exit 1 + ;; +esac + SUBMIT_DIRECTORY=$1 shift +echo "SUBMIT_DIRECTORY=${SUBMIT_DIRECTORY}" + SCRIPT_DIRECTORY=$1 shift -PACKAGE=$1 -shift -TYPE=$1 -shift +echo "SCRIPT_DIRECTORY=${SCRIPT_DIRECTORY}" + SEQUENCE_TAG=$1 shift -SPLIT=$1 -shift -NIGHTLY_RELEASE_SHORT=$1 -shift -PROJECT=$1 -shift -PLATFORM=$1 -shift -NIGHTLY_TAG=$1 +echo "SEQUENCE_TAG=${SEQUENCE_TAG}" + +PACKAGE=$1 shift +echo "PACKAGE=${PACKAGE}" + OUTFILE=$1 shift +echo "OUTFILE=${OUTFILE}" -# we seem to have to copy the env variables locally -GRID_OPTIONS=$ART_GRID_OPTIONS +case ${TYPE} in -if [ ${SKIP_SETUP} -eq 0 ]; then - echo "Setting up release: ${PLATFORM} ${NIGHTLY_RELEASE_SHORT} ${NIGHTLY_TAG} ${PROJECT}" - USER=artprod + 'batch') + JOB_TYPE=$1 + shift + echo "JOB_TYPE=${JOB_TYPE}" - export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase - source $ATLAS_LOCAL_ROOT_BASE/user/atlasLocalSetup.sh + NUMBER_OF_TESTS=$1 + SPLIT="--split ${NUMBER_OF_TESTS}" + shift + echo "NUMBER_OF_TESTS=${NUMBER_OF_TESTS}" + echo "SPLIT=${SPLIT}" + ;; - export RUCIO_ACCOUNT=artprod + 'single') + JOB_NAME=$1 + shift + echo "JOB_NAME=${JOB_NAME}" + ;; +esac - lsetup panda "asetup --platform=${PLATFORM} ${NIGHTLY_RELEASE_SHORT},${NIGHTLY_TAG},${PROJECT}" +# general options +PATHENA_OPTIONS="--destSE=CERN-PROD_SCRATCHDISK" +OUT="%OUT.tar" - voms-proxy-init --rfc -noregen -cert ./grid.proxy -voms atlas +# we seem to have to copy the env variables locally +GRID_OPTIONS=$ART_GRID_OPTIONS +echo "GRID_OPTIONS=${GRID_OPTIONS}" -fi -if [ ${SPLIT} -eq 0 ]; then - SPLIT="" -else - SPLIT="--split ${SPLIT}" -fi +case ${TYPE} in + + 'batch') + # <script_directory> <sequence_tag> <package> <outfile> <job_type> <job_index> + INTERNAL_COMMAND="grid batch" + JOB_INDEX="%RNDM:0" + ARGS="${JOB_TYPE} ${JOB_INDEX}" + echo "JOB_INDEX=${JOB_INDEX}" + echo "ARGS=${ARGS}" + ;; + 'single') + # <script_directory> <sequence_tag> <package> <outfile> <job_name> + INTERNAL_COMMAND="grid single" + PATHENA_TYPE_OPTIONS="${LARGE_JOB} ${INDS} ${NFILES} ${NFILES_PER_JOB} ${NCORES}" + ARGS="${JOB_NAME}" + echo "PATHENA_TYPE_OPTIONS=${PATHENA_TYPE_OPTIONS}" + echo "ARGS=${ARGS}" + ;; +esac + # NOTE: for art-internal.py the current dir can be used as it is copied there -cd ${SUBMIT_DIRECTORY}/${PACKAGE}/run -SUBCOMMAND="./art-internal.py job grid ${SCRIPT_DIRECTORY} ${PACKAGE} ${TYPE} ${SEQUENCE_TAG} ${TYPE_OPTION} %OUT.tar ${NIGHTLY_RELEASE_SHORT} ${PROJECT} ${PLATFORM} ${NIGHTLY_TAG}" -CMD="pathena ${GRID_OPTIONS} ${PATHENA_OPTIONS} ${PATHENA_TYPE_OPTIONS} --noBuild --expertOnly_skipScout --trf \"${SUBCOMMAND}\" ${SPLIT} --outDS ${OUTFILE} --extOutFile art-job.json ${INDS} ${NFILES}" +cd "${SUBMIT_DIRECTORY}"/"${PACKAGE}"/run +SUBCOMMAND="./art-internal.py ${INTERNAL_COMMAND} ${IN_FILE} ${SCRIPT_DIRECTORY} ${SEQUENCE_TAG} ${PACKAGE} ${OUT} ${ARGS}" +CMD="pathena ${GRID_OPTIONS} ${PATHENA_OPTIONS} ${PATHENA_TYPE_OPTIONS} --noBuild --expertOnly_skipScout --trf \"${SUBCOMMAND}\" ${SPLIT} --outDS ${OUTFILE} --extOutFile art-job.json" #--disableAutoRetry #--excludedSite=ANALY_TECHNION-HEP-CREAM #--site=ANALY_NIKHEF-ELPROD_SHORT,ANALY_NIKHEF-ELPROD" #--site=ANALY_FZK,ANALY_BNL,ANALY_RAL" -echo ${CMD} +echo "Command: ${CMD}" -RESULT=`eval "${CMD}"` -echo ${RESULT} +if [ ${NO_ACTION} -ne 1 ]; then + echo "Submitting..." + RESULT=$(eval "${CMD}") + echo "${RESULT}" +fi diff --git a/Tools/ART/scripts/art.py b/Tools/ART/scripts/art.py index 1bcf7ab382deab29a0ad4e86c0bd35d445d4de89..04b4c72f7598c1c330ce16863dda514709c04111 100755 --- a/Tools/ART/scripts/art.py +++ b/Tools/ART/scripts/art.py @@ -1,37 +1,42 @@ #!/usr/bin/env python -# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration """ ART - ATLAS Release Tester. +You need to setup for an ATLAS release before using ART. + Usage: art.py run [-v -q --type=<T> --max-jobs=<N> --ci] <script_directory> <sequence_tag> - art.py grid [-v -q --type=<T> -n] <script_directory> <sequence_tag> - art.py submit [-v -q --type=<T> -n] <sequence_tag> <nightly_release> <project> <platform> <nightly_tag> [<package>] - art.py copy [-v -q --user=<user> --dst=<dir>] <nightly_release> <project> <platform> <nightly_tag> <package> + art.py grid [-v -q --type=<T> --max-jobs=<N> --config=<file> --copy -n] <script_directory> <sequence_tag> + art.py submit [-v -q --type=<T> --max-jobs=<N> --config=<file> -n] <sequence_tag> [<package>] + art.py copy [-v -q --user=<user> --dst=<dir>] <indexed_package> art.py validate [-v -q] <script_directory> - art.py included [-v -q --type=<T> --test-type=<TT>] <script_directory> [<nightly_release> <project> <platform>] - art.py compare grid [-v -q --days=<D> --user=<user>] <nightly_release> <project> <platform> <nightly_tag> <package> <test_name> <file_name>... - art.py compare ref [-v -q] <file_name> <ref_file> - art.py download [-v -q] <input_file> - art.py list grid [-v -q --user=<user> --json --type=<T> --test-type=<TT> --nogrid] <package> <nightly_release> <project> <platform> <nightly_tag> - art.py log grid [-v -q --user=<user>] <package> <test_name> <nightly_release> <project> <platform> <nightly_tag> - art.py output grid [-v -q --user=<user>] <package> <test_name> <nightly_release> <project> <platform> <nightly_tag> + art.py included [-v -q --type=<T> --test-type=<TT>] <script_directory> + art.py compare grid [-v -q --days=<D> --user=<user> --entries=<entries>] <package> <test_name> + art.py compare ref [-v -q --entries=<entries>] <path> <ref_path> + art.py list grid [-v -q --user=<user> --json --test-type=<TT>] <package> + art.py log grid [-v -q --user=<user>] <package> <test_name> + art.py output grid [-v -q --user=<user>] <package> <test_name> + art.py config [-v -q --config=<file>] [<package>] + art.py createpoolfile [-v -q] Options: - --ci Run Continuous Integration tests only (using env: AtlasBuildBranch) - --days=<D> Number of days ago to pick up reference for compare [default: 1] - --dst=<dir> Destination directory for downloaded files - -h --help Show this screen. - --json Output in json format - --max-jobs=<N> Maximum number of concurrent jobs to run [default: 0] - -n --no-action No real submit will be done - --nogrid Do not retrieve grid indices - -q --quiet Show less information, only warnings and errors - --test-type=<TT> Type of test (e.g. all, batch or single) [default: all] - --type=<T> Type of job (e.g. grid, build) - --user=<user> User to use for RUCIO - -v --verbose Show more information, debug level - --version Show version. + --ci Run Continuous Integration tests only (using env: AtlasBuildBranch) + --config=<file> Use specific config file [default: art-configuration.yml] + --copy Run the copy after running the jobs + --days=<D> Number of days ago to pick up reference for compare [default: 1] + --dst=<dir> Destination directory for downloaded files + --entries=<entries> Number of entries to compare [default: 10] + -h --help Show this screen. + --json Output in json format + --max-jobs=<N> Maximum number of concurrent jobs to run [default: 0] + -n --no-action No real submit will be done + -q --quiet Show less information, only warnings and errors + --test-type=<TT> Type of test (e.g. all, batch or single) [default: all] + --type=<T> Type of job (e.g. grid, build) + --user=<user> User to use for RUCIO + -v --verbose Show more information, debug level + --version Show version. Sub-commands: run Run jobs from a package in a local build (needs release and grid setup) @@ -41,28 +46,34 @@ Sub-commands: validate Check headers in tests included Show list of files which will be included for art submit/art grid compare Compare the output of a job - download Download a file from rucio list List the jobs of a package log Show the log of a job output Get the output of a job + config Show configuration + createpoolfile Creates an 'empty' poolfile catalog Arguments: - file_name Filename to save the output to - index Index of the test inside the package - input_file Input file to download (e.g. CONTAINER_ID:ENTRY_NAME) - nightly_release Name of the nightly release (e.g. 21.0) - nightly_tag Nightly tag (e.g. 2017-02-26T2119) - out Tar filename used for the output of the job + indexed_package Package of the test or indexed package (e.g. MooPerformance.4) package Package of the test (e.g. Tier0ChainTests) - platform Platform (e.g. x86_64-slc6-gcc62-opt) - project Name of the project (e.g. Athena) + path Directory or File to compare + ref_path Directory or File to compare to script_directory Directory containing the package(s) with tests sequence_tag Sequence tag (e.g. 0 or PIPELINE_ID) test_name Name of the test inside the package (e.g. test_q322.sh) + +Environment: + AtlasBuildBranch Name of the nightly release (e.g. 21.0) + AtlasProject Name of the project (e.g. Athena) + <AtlasProject>_PLATFORM Platform (e.g. x86_64-slc6-gcc62-opt) + AtlasBuildStamp Nightly tag (e.g. 2017-02-26T2119) + +Tests are called with: + arguments: PACKAGE TEST_NAME SCRIPT_DIRECTORY TYPE [IN_FILE] + environment: ArtScriptDirectory, ArtPackage, ArtJobType, ArtJobName, [ArtInFile] """ __author__ = "Tulay Cuhadar Donszelmann <tcuhadar@cern.ch>" -__version__ = '0.6.10' +__version__ = '0.9.5' import logging import os @@ -72,7 +83,7 @@ from ART.docopt_dispatch import dispatch from ART import ArtBase, ArtGrid, ArtBuild -from ART.art_misc import set_log +from ART.art_misc import get_atlas_env, set_log MODULE = "art" @@ -82,116 +93,107 @@ MODULE = "art" @dispatch.on('compare', 'ref') -def compare_ref(file_name, ref_file, **kwargs): +def compare_ref(path, ref_path, **kwargs): """Compare the output of a job.""" set_log(kwargs) art_directory = os.path.dirname(os.path.realpath(sys.argv[0])) - exit(ArtBase(art_directory).compare_ref(file_name, ref_file)) + entries = kwargs['entries'] + exit(ArtBase(art_directory).compare_ref(path, ref_path, entries)) @dispatch.on('compare', 'grid') -def compare_grid(package, test_name, nightly_release, project, platform, nightly_tag, **kwargs): +def compare_grid(package, test_name, **kwargs): """Compare the output of a job.""" set_log(kwargs) art_directory = os.path.dirname(os.path.realpath(sys.argv[0])) + (nightly_release, project, platform, nightly_tag) = get_atlas_env() days = int(kwargs['days']) - file_names = kwargs['file_name'] + entries = kwargs['entries'] user = kwargs['user'] - exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag).compare(package, test_name, days, file_names, user)) + exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag).compare(package, test_name, days, user, entries=entries, shell=True)) @dispatch.on('list', 'grid') -def list(package, nightly_release, project, platform, nightly_tag, **kwargs): +def list(package, **kwargs): """List the jobs of a package.""" set_log(kwargs) art_directory = os.path.dirname(os.path.realpath(sys.argv[0])) - job_type = 'grid' if kwargs['type'] is None else kwargs['type'] + (nightly_release, project, platform, nightly_tag) = get_atlas_env() + job_type = 'grid' index_type = kwargs['test_type'] json_format = kwargs['json'] user = kwargs['user'] - nogrid = kwargs['nogrid'] - exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag).list(package, job_type, index_type, json_format, user, nogrid)) + exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag).list(package, job_type, index_type, json_format, user)) @dispatch.on('log', 'grid') -def log(package, test_name, nightly_release, project, platform, nightly_tag, **kwargs): +def log(package, test_name, **kwargs): """Show the log of a job.""" set_log(kwargs) art_directory = os.path.dirname(os.path.realpath(sys.argv[0])) + (nightly_release, project, platform, nightly_tag) = get_atlas_env() user = kwargs['user'] exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag).log(package, test_name, user)) @dispatch.on('output', 'grid') -def output(package, test_name, nightly_release, project, platform, nightly_tag, **kwargs): +def output(package, test_name, **kwargs): """Get the output of a job.""" set_log(kwargs) art_directory = os.path.dirname(os.path.realpath(sys.argv[0])) + (nightly_release, project, platform, nightly_tag) = get_atlas_env() user = kwargs['user'] exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag).output(package, test_name, user)) @dispatch.on('submit') -def submit(sequence_tag, nightly_release, project, platform, nightly_tag, **kwargs): +def submit(sequence_tag, **kwargs): """Submit nightly jobs to the grid, NOT for users.""" set_log(kwargs) art_directory = os.path.dirname(os.path.realpath(sys.argv[0])) + (nightly_release, project, platform, nightly_tag) = get_atlas_env() job_type = 'grid' if kwargs['type'] is None else kwargs['type'] package = kwargs['package'] + config = kwargs['config'] no_action = kwargs['no_action'] wait_and_copy = True - exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag).task_list(job_type, sequence_tag, package, no_action, wait_and_copy)) + exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag, max_jobs=int(kwargs['max_jobs'])).task_list(job_type, sequence_tag, package, no_action, wait_and_copy, config)) @dispatch.on('grid') def grid(script_directory, sequence_tag, **kwargs): """Run jobs from a package on the grid, needs release and grid setup.""" set_log(kwargs) - log = logging.getLogger(MODULE) art_directory = os.path.dirname(os.path.realpath(sys.argv[0])) - try: - nightly_release = os.environ['AtlasBuildBranch'] - project = os.environ['AtlasProject'] - platform = os.environ[project + '_PLATFORM'] - nightly_tag = os.environ['AtlasBuildStamp'] - except KeyError, e: - log.critical("Environment variable not set %s", e) - sys.exit(1) + (nightly_release, project, platform, nightly_tag) = get_atlas_env() job_type = 'grid' if kwargs['type'] is None else kwargs['type'] package = None + config = kwargs['config'] no_action = kwargs['no_action'] - wait_and_copy = False - exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag, script_directory, True).task_list(job_type, sequence_tag, package, no_action, wait_and_copy)) + wait_and_copy = kwargs['copy'] + exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag, script_directory=script_directory, skip_setup=True, max_jobs=int(kwargs['max_jobs'])).task_list(job_type, sequence_tag, package, no_action, wait_and_copy, config)) @dispatch.on('run') def run(script_directory, sequence_tag, **kwargs): """Run jobs from a package in a local build, needs release and grid setup.""" set_log(kwargs) - log = logging.getLogger(MODULE) art_directory = os.path.dirname(os.path.realpath(sys.argv[0])) - try: - nightly_release = os.environ['AtlasBuildBranch'] - project = os.environ['AtlasProject'] - platform = os.environ[project + '_PLATFORM'] - nightly_tag = os.environ['AtlasBuildStamp'] - except KeyError, e: - log.critical("Environment variable not set %s", e) - sys.exit(1) + (nightly_release, project, platform, nightly_tag) = get_atlas_env() job_type = 'build' if kwargs['type'] is None else kwargs['type'] exit(ArtBuild(art_directory, nightly_release, project, platform, nightly_tag, script_directory, max_jobs=int(kwargs['max_jobs']), ci=kwargs['ci']).task_list(job_type, sequence_tag)) @dispatch.on('copy') -def copy(nightly_release, project, platform, nightly_tag, **kwargs): +def copy(indexed_package, **kwargs): """Copy outputs to eos area.""" set_log(kwargs) art_directory = os.path.dirname(os.path.realpath(sys.argv[0])) - package = kwargs['package'] + (nightly_release, project, platform, nightly_tag) = get_atlas_env() # NOTE: default depends on USER, not set it here but in ArtGrid.copy dst = kwargs['dst'] user = kwargs['user'] - exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag).copy(package, dst, user)) + exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag).copy(indexed_package, dst=dst, user=user)) @dispatch.on('validate') @@ -207,20 +209,29 @@ def included(script_directory, **kwargs): """Show list of files which will be included for art submit/art grid.""" set_log(kwargs) art_directory = os.path.dirname(os.path.realpath(sys.argv[0])) - nightly_release = os.environ['AtlasBuildBranch'] if kwargs['nightly_release'] is None else kwargs['nightly_release'] - project = os.environ['AtlasProject'] if kwargs['project'] is None else kwargs['project'] - platform = os.environ[project + '_PLATFORM'] if kwargs['platform'] is None else kwargs['platform'] - art_type = 'grid' if kwargs['type'] is None else kwargs['type'] + (nightly_release, project, platform, nightly_tag) = get_atlas_env() + job_type = kwargs['type'] # None will list all types index_type = kwargs['test_type'] - exit(ArtBase(art_directory).included(script_directory, art_type, index_type, nightly_release, project, platform)) + exit(ArtBase(art_directory).included(script_directory, job_type, index_type, nightly_release, project, platform)) + + +@dispatch.on('config') +def config(package, **kwargs): + """Show configuration.""" + set_log(kwargs) + art_directory = os.path.dirname(os.path.realpath(sys.argv[0])) + (nightly_release, project, platform, nightly_tag) = get_atlas_env() + config = kwargs['config'] + exit(ArtBase(art_directory).config(package, nightly_release, project, platform, config)) -@dispatch.on('download') -def download(input_file, **kwargs): - """Download a file from rucio.""" +@dispatch.on('createpoolfile') +def createpoolfile(package, **kwargs): + """Show configuration.""" set_log(kwargs) art_directory = os.path.dirname(os.path.realpath(sys.argv[0])) - exit(ArtBase(art_directory).download(input_file)) + (nightly_release, project, platform, nightly_tag) = get_atlas_env() + exit(ArtGrid(art_directory, nightly_release, project, platform, nightly_tag).createpoolfile()) if __name__ == '__main__':