From 90d493389ff07e59423235f999b88789eff599a6 Mon Sep 17 00:00:00 2001 From: LHCbDIRAC Bot <dirac.bot@cern.ch> Date: Tue, 22 Oct 2024 22:22:39 +0200 Subject: [PATCH] Merge branch 'roneil-fix-runapp-module' into 'master' Support lb-conda apps in ModuleBase See merge request lhcb-dirac/LHCbDIRAC!1658 (cherry picked from commit 0f57592b5ef462e1f58b2b03f6ae6792890d3d98) e4e77a81 introduce "cleanedApplicationName" in ModuleBase and always use it when defining file paths. 8e155225 what if outputList is not defined (e.g. if step job failed?) 338fc6d6 Add output file to pool catalog if it's not already there. f4794ec4 is this enough? Co-authored-by: Chris Burr <christopher.burr@cern.ch> --- .../Core/Utilities/RunApplication.py | 3 +- .../Workflow/Modules/BookkeepingReport.py | 7 +-- src/LHCbDIRAC/Workflow/Modules/ModuleBase.py | 44 ++++++++++++++++--- 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/src/LHCbDIRAC/Core/Utilities/RunApplication.py b/src/LHCbDIRAC/Core/Utilities/RunApplication.py index 54ccc7ec0c..348e10fc6a 100644 --- a/src/LHCbDIRAC/Core/Utilities/RunApplication.py +++ b/src/LHCbDIRAC/Core/Utilities/RunApplication.py @@ -43,6 +43,7 @@ class RunApplication: self.log = gLogger.getSubLogger("RunApplication") self.applicationName = gaudiAppModule.applicationName + self.cleanedApplicationName = gaudiAppModule.cleanedApplicationName self.applicationVersion = gaudiAppModule.applicationVersion self.prmonPath = "/cvmfs/lhcb.cern.ch/lib/experimental/prmon/bin/prmon" self.usePrmon = gaudiAppModule.usePrmon @@ -51,7 +52,7 @@ class RunApplication: raise LHCbDIRACError(f"gaudiAppModule.executable={gaudiAppModule.executable!r} is not supported") self.prodConfFileName = "prodConf_{}_{}_{}_{}.json".format( - self.applicationName, + self.cleanedApplicationName, gaudiAppModule.production_id, gaudiAppModule.prod_job_id, gaudiAppModule.step_number, diff --git a/src/LHCbDIRAC/Workflow/Modules/BookkeepingReport.py b/src/LHCbDIRAC/Workflow/Modules/BookkeepingReport.py index b6f322d8fd..bc799080c4 100644 --- a/src/LHCbDIRAC/Workflow/Modules/BookkeepingReport.py +++ b/src/LHCbDIRAC/Workflow/Modules/BookkeepingReport.py @@ -165,7 +165,7 @@ class BookkeepingReport(ModuleBase): except KeyError: self.log.warn("XML Summary file name not found, will try to guess it") xmlSummaryFile = "summary{}_{}_{}_{}.xml".format( - self.applicationName, + self.cleanedApplicationName, self.production_id, self.prod_job_id, self.step_number, @@ -173,13 +173,13 @@ class BookkeepingReport(ModuleBase): self.log.warn(f"Trying {xmlSummaryFile}") if xmlSummaryFile not in os.listdir("."): self.log.warn(f"XML Summary file {xmlSummaryFile} not found, will try to guess a second time") - xmlSummaryFile = f"summary{self.applicationName}_{self.step_id}.xml" + xmlSummaryFile = f"summary{self.cleanedApplicationName}_{self.step_id}.xml" self.log.warn(f"Trying {xmlSummaryFile}") if xmlSummaryFile not in os.listdir("."): self.log.warn( f"XML Summary file {xmlSummaryFile} not found, will try to guess a third and last time" ) - xmlSummaryFile = f"summary{self.applicationName}_{self.step_number}.xml" + xmlSummaryFile = f"summary{self.cleanedApplicationName}_{self.step_number}.xml" self.log.warn(f"Trying {xmlSummaryFile}") try: self.xf_o = XMLSummary(xmlSummaryFile) @@ -396,6 +396,7 @@ class BookkeepingReport(ModuleBase): typeVersion = "0" # PROTECTION for old production XMLs + # TODO: think about removing this! if typeName.upper() == "HIST": typeName = f"{self.applicationName.upper()}HIST" diff --git a/src/LHCbDIRAC/Workflow/Modules/ModuleBase.py b/src/LHCbDIRAC/Workflow/Modules/ModuleBase.py index 1b2db120ab..b83d1df775 100644 --- a/src/LHCbDIRAC/Workflow/Modules/ModuleBase.py +++ b/src/LHCbDIRAC/Workflow/Modules/ModuleBase.py @@ -20,6 +20,7 @@ from DIRAC.Core.Security.ProxyInfo import getProxyInfo from DIRAC.Core.Utilities.Adler import fileAdler from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations from DIRAC.Resources.Catalog.PoolXMLFile import getGUID +from DIRAC.Resources.Catalog.PoolXMLCatalog import PoolXMLCatalog from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport from DIRAC.TransformationSystem.Client.FileReport import FileReport from DIRAC.RequestManagementSystem.Client.Request import Request @@ -32,6 +33,7 @@ import LHCbDIRAC from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient from LHCbDIRAC.Core.Utilities.ProductionData import getLogPath, constructProductionLFNs from LHCbDIRAC.Workflow.Modules.ModulesUtilities import getNumberOfProcessorsToUse +from LHCbDIRAC.Core.Utilities.File import makeGuid class ModuleBase: @@ -131,6 +133,10 @@ class ModuleBase: self.stepProcPass = None self.outputFilePrefix = "" + @property + def cleanedApplicationName(self): + return self.applicationName.replace("/", "") + ############################################################################# def execute( @@ -372,9 +378,9 @@ class ModuleBase: # this is only for production jobs and for application steps if prodID and jobID and stepInstanceNumber and "listoutput" in self.step_commons: self.outputFilePrefix = f"{prodID}_{jobID}_{stepInstanceNumber}" - self.applicationLog = self.applicationName + "_" + self.outputFilePrefix + ".log" - self.XMLSummary = "summary" + self.applicationName + "_" + self.outputFilePrefix + ".xml" - self.histoName = self.applicationName + "_" + self.outputFilePrefix + ".Hist.root" + self.applicationLog = self.cleanedApplicationName + "_" + self.outputFilePrefix + ".log" + self.XMLSummary = "summary" + self.cleanedApplicationName + "_" + self.outputFilePrefix + ".xml" + self.histoName = self.cleanedApplicationName + "_" + self.outputFilePrefix + ".Hist.root" for fileTypeDict in self.step_commons["listoutput"]: # this is a dict like {'outputDataType': 'sim'} # for non histo-merging prods @@ -818,8 +824,15 @@ class ModuleBase: stepIndex = self.gaudiSteps.index(self.stepName) previousStep = self.gaudiSteps[stepIndex - 1] + outputList = self.workflow_commons.get("outputList", []) + if not outputList: + raise RuntimeError( + f"outputList was empty while finding input data for step {self.stepName} - " + "did the first step produce any output at all?" + ) + stepInputData = [] - for outputF in self.workflow_commons["outputList"]: + for outputF in outputList: try: if ( outputF["stepName"] == previousStep @@ -932,7 +945,28 @@ class ModuleBase: "stepName": self.stepName, } ) - + try: + physFileName = fileFound["outputDataName"] + poolCat = PoolXMLCatalog(self.poolXMLCatName) + if not poolCat.getTypeByPfn(physFileName): + self.log.info("Output file not found in pool catalog. Will add it.", physFileName) + poolCat.addFile( + ( + physFileName, + physFileName, + None, + makeGuid(physFileName)[physFileName], + "ROOT_All", + ) + ) + poolCat.flush() + else: + self.log.info("Output file exists already in pool catalog. Won't touch.", physFileName) + except Exception: + self.log.exception( + "Couldn't add missing output file to pool catalog", + fileFound, + ) return (finalOutputs, bkFileTypes) ############################################################################# -- GitLab