diff --git a/src/LHCbDIRAC/Core/Utilities/RunApplication.py b/src/LHCbDIRAC/Core/Utilities/RunApplication.py index 54ccc7ec0c44eb9ff6fbdf0dbaf0e92e85b7d784..348e10fc6a5c5a5028cdcc6950e21161a88d6ec0 100644 --- a/src/LHCbDIRAC/Core/Utilities/RunApplication.py +++ b/src/LHCbDIRAC/Core/Utilities/RunApplication.py @@ -43,6 +43,7 @@ class RunApplication: self.log = gLogger.getSubLogger("RunApplication") self.applicationName = gaudiAppModule.applicationName + self.cleanedApplicationName = gaudiAppModule.cleanedApplicationName self.applicationVersion = gaudiAppModule.applicationVersion self.prmonPath = "/cvmfs/lhcb.cern.ch/lib/experimental/prmon/bin/prmon" self.usePrmon = gaudiAppModule.usePrmon @@ -51,7 +52,7 @@ class RunApplication: raise LHCbDIRACError(f"gaudiAppModule.executable={gaudiAppModule.executable!r} is not supported") self.prodConfFileName = "prodConf_{}_{}_{}_{}.json".format( - self.applicationName, + self.cleanedApplicationName, gaudiAppModule.production_id, gaudiAppModule.prod_job_id, gaudiAppModule.step_number, diff --git a/src/LHCbDIRAC/Workflow/Modules/BookkeepingReport.py b/src/LHCbDIRAC/Workflow/Modules/BookkeepingReport.py index b6f322d8fdebad943934afc7ba1ba538136b9a06..bc799080c43b9eaa3710b1eaaf16521443899c16 100644 --- a/src/LHCbDIRAC/Workflow/Modules/BookkeepingReport.py +++ b/src/LHCbDIRAC/Workflow/Modules/BookkeepingReport.py @@ -165,7 +165,7 @@ class BookkeepingReport(ModuleBase): except KeyError: self.log.warn("XML Summary file name not found, will try to guess it") xmlSummaryFile = "summary{}_{}_{}_{}.xml".format( - self.applicationName, + self.cleanedApplicationName, self.production_id, self.prod_job_id, self.step_number, @@ -173,13 +173,13 @@ class BookkeepingReport(ModuleBase): self.log.warn(f"Trying {xmlSummaryFile}") if xmlSummaryFile not in os.listdir("."): self.log.warn(f"XML Summary file {xmlSummaryFile} not found, will try to guess a second time") - xmlSummaryFile = f"summary{self.applicationName}_{self.step_id}.xml" + xmlSummaryFile = f"summary{self.cleanedApplicationName}_{self.step_id}.xml" self.log.warn(f"Trying {xmlSummaryFile}") if xmlSummaryFile not in os.listdir("."): self.log.warn( f"XML Summary file {xmlSummaryFile} not found, will try to guess a third and last time" ) - xmlSummaryFile = f"summary{self.applicationName}_{self.step_number}.xml" + xmlSummaryFile = f"summary{self.cleanedApplicationName}_{self.step_number}.xml" self.log.warn(f"Trying {xmlSummaryFile}") try: self.xf_o = XMLSummary(xmlSummaryFile) @@ -396,6 +396,7 @@ class BookkeepingReport(ModuleBase): typeVersion = "0" # PROTECTION for old production XMLs + # TODO: think about removing this! if typeName.upper() == "HIST": typeName = f"{self.applicationName.upper()}HIST" diff --git a/src/LHCbDIRAC/Workflow/Modules/ModuleBase.py b/src/LHCbDIRAC/Workflow/Modules/ModuleBase.py index 1b2db120aba818182e49544b9df0e76d3c342711..b83d1df77566edbedc58bd729a944ad271fff419 100644 --- a/src/LHCbDIRAC/Workflow/Modules/ModuleBase.py +++ b/src/LHCbDIRAC/Workflow/Modules/ModuleBase.py @@ -20,6 +20,7 @@ from DIRAC.Core.Security.ProxyInfo import getProxyInfo from DIRAC.Core.Utilities.Adler import fileAdler from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations from DIRAC.Resources.Catalog.PoolXMLFile import getGUID +from DIRAC.Resources.Catalog.PoolXMLCatalog import PoolXMLCatalog from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport from DIRAC.TransformationSystem.Client.FileReport import FileReport from DIRAC.RequestManagementSystem.Client.Request import Request @@ -32,6 +33,7 @@ import LHCbDIRAC from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient from LHCbDIRAC.Core.Utilities.ProductionData import getLogPath, constructProductionLFNs from LHCbDIRAC.Workflow.Modules.ModulesUtilities import getNumberOfProcessorsToUse +from LHCbDIRAC.Core.Utilities.File import makeGuid class ModuleBase: @@ -131,6 +133,10 @@ class ModuleBase: self.stepProcPass = None self.outputFilePrefix = "" + @property + def cleanedApplicationName(self): + return self.applicationName.replace("/", "") + ############################################################################# def execute( @@ -372,9 +378,9 @@ class ModuleBase: # this is only for production jobs and for application steps if prodID and jobID and stepInstanceNumber and "listoutput" in self.step_commons: self.outputFilePrefix = f"{prodID}_{jobID}_{stepInstanceNumber}" - self.applicationLog = self.applicationName + "_" + self.outputFilePrefix + ".log" - self.XMLSummary = "summary" + self.applicationName + "_" + self.outputFilePrefix + ".xml" - self.histoName = self.applicationName + "_" + self.outputFilePrefix + ".Hist.root" + self.applicationLog = self.cleanedApplicationName + "_" + self.outputFilePrefix + ".log" + self.XMLSummary = "summary" + self.cleanedApplicationName + "_" + self.outputFilePrefix + ".xml" + self.histoName = self.cleanedApplicationName + "_" + self.outputFilePrefix + ".Hist.root" for fileTypeDict in self.step_commons["listoutput"]: # this is a dict like {'outputDataType': 'sim'} # for non histo-merging prods @@ -818,8 +824,15 @@ class ModuleBase: stepIndex = self.gaudiSteps.index(self.stepName) previousStep = self.gaudiSteps[stepIndex - 1] + outputList = self.workflow_commons.get("outputList", []) + if not outputList: + raise RuntimeError( + f"outputList was empty while finding input data for step {self.stepName} - " + "did the first step produce any output at all?" + ) + stepInputData = [] - for outputF in self.workflow_commons["outputList"]: + for outputF in outputList: try: if ( outputF["stepName"] == previousStep @@ -932,7 +945,28 @@ class ModuleBase: "stepName": self.stepName, } ) - + try: + physFileName = fileFound["outputDataName"] + poolCat = PoolXMLCatalog(self.poolXMLCatName) + if not poolCat.getTypeByPfn(physFileName): + self.log.info("Output file not found in pool catalog. Will add it.", physFileName) + poolCat.addFile( + ( + physFileName, + physFileName, + None, + makeGuid(physFileName)[physFileName], + "ROOT_All", + ) + ) + poolCat.flush() + else: + self.log.info("Output file exists already in pool catalog. Won't touch.", physFileName) + except Exception: + self.log.exception( + "Couldn't add missing output file to pool catalog", + fileFound, + ) return (finalOutputs, bkFileTypes) #############################################################################