diff --git a/python/LbNightlyTools/BuildMethods.py b/python/LbNightlyTools/BuildMethods.py index 56a96fbdcd85d9521d55ee0c43c3d23605d1bc79..b3f1424554f77708ac8979b8a21d46086f1939d4 100644 --- a/python/LbNightlyTools/BuildMethods.py +++ b/python/LbNightlyTools/BuildMethods.py @@ -1,5 +1,5 @@ ############################################################################### -# (c) Copyright 2013 CERN # +# (c) Copyright 2013-2023 CERN # # # # This software is distributed under the terms of the GNU General Public # # Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". # @@ -20,8 +20,8 @@ __author__ = "Marco Clemencic <marco.clemencic@cern.ch>" import logging import os import re -import shutil from datetime import datetime +from time import sleep from LbNightlyTools.Utils import compatible_lcg_external_files, find_path from LbNightlyTools.Utils import log_call as _log_call @@ -139,7 +139,22 @@ def log_call(cmd, *args, **kwargs): cwd=kwargs.get("cwd", os.getcwd()), ) - return _log_call(cmd, *args, **kwargs) + # Workaround for https://gitlab.cern.ch/lhcb-core/LbNightlyTools/-/issues/119 + for attempt in range(5): + if attempt != 0: + sleep(10) + result = _log_call(cmd, *args, **kwargs) + if ( + b"Failed to get file information for file descriptor 3" + not in result["stdout"] + ): + if attempt != 0: + __log__.debug("apptainer successfully started on attempt %d", attempt) + break + __log__.debug("apptainer failed to start on attempt %d", attempt) + else: + __log__.warning("giving up after repeated failures of apptainer") + return result def ensure_dir(path): @@ -244,7 +259,7 @@ class make(object): target, started.isoformat(), " ".join(quote(a) for a in cmd), - result["stdout"], + result["stdout"].decode("utf-8", errors="replace"), completed.isoformat(), ) .encode("utf-8") diff --git a/python/LbNightlyTools/tests/test_apptainer.py b/python/LbNightlyTools/tests/test_apptainer.py new file mode 100644 index 0000000000000000000000000000000000000000..ea990e02d446571d03526e513d37b589e95b581f --- /dev/null +++ b/python/LbNightlyTools/tests/test_apptainer.py @@ -0,0 +1,54 @@ +############################################################################### +# (c) Copyright 2023 CERN # +# # +# This software is distributed under the terms of the GNU General Public # +# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". # +# # +# In applying this licence, CERN does not waive the privileges and immunities # +# granted to it by virtue of its status as an Intergovernmental Organization # +# or submit itself to any jurisdiction. # +############################################################################### +import logging + +import pytest + +import LbNightlyTools.BuildMethods as bm + + +class mock_apptainer_call: + def __init__(self, max_failures): + self.max_failures = max_failures + self.n_of_invocations = 0 + + def __call__(self, *_args, **_kwargs): + self.n_of_invocations += 1 + if self.n_of_invocations <= self.max_failures: + return {"stdout": b"Failed to get file information for file descriptor 3\n"} + else: + return {"stdout": b"all good!\n"} + + +@pytest.mark.parametrize( + "max_failures,expected_message", + [ + (1, "apptainer successfully started on attempt 1"), + (10, "giving up after repeated failures of apptainer"), + ], +) +def test_workaroud(monkeypatch, caplog, max_failures, expected_message): + """ + https://gitlab.cern.ch/lhcb-core/LbNightlyTools/-/issues/119 + """ + # avoid sleep time between retries + monkeypatch.setattr(bm, "sleep", lambda _: None) + # avoid running anything and pretend we ran apptainer and that fails a number of times + monkeypatch.setattr(bm, "_log_call", mock_apptainer_call(max_failures)) + # avoid wrapping the command with apptainer + monkeypatch.setenv("BINARY_TAG", "dummy") + # capture DEBUG logging messages + caplog.set_level(logging.DEBUG) + + bm.log_call(["true"]) + + assert "apptainer failed to start on attempt 0" in caplog.text + assert expected_message in caplog.text