From 11e8a420c04819587b98009c700329a86825b9a9 Mon Sep 17 00:00:00 2001 From: Federico Stagni <federico.stagni@cern.ch> Date: Fri, 5 Apr 2024 15:06:18 +0200 Subject: [PATCH] Merge branch 'prod-id-to-path' into 'master' [master] Add BookkeepingClient.getOutputPathsForProdID See merge request lhcb-dirac/LHCbDIRAC!1543 (cherry picked from commit d964d21ac39ea901ffa126707ebc88e3805835fe) 82a0c617 doc: Clarify export_getProductionInformation 92b15705 refactor: Modernise NewOracleBookkeepingDB 31bd5c47 feat: Add BookkeepingClient.getOutputPathsForProdID --- .../DB/LegacyOracleBookkeepingDB.py | 4 ++- .../DB/NewOracleBookkeepingDB.py | 35 +++++++++++-------- .../Service/BookkeepingManagerHandler.py | 35 +++++++++++++++---- 3 files changed, 52 insertions(+), 22 deletions(-) diff --git a/src/LHCbDIRAC/BookkeepingSystem/DB/LegacyOracleBookkeepingDB.py b/src/LHCbDIRAC/BookkeepingSystem/DB/LegacyOracleBookkeepingDB.py index df37bce6fc..3ce021541c 100644 --- a/src/LHCbDIRAC/BookkeepingSystem/DB/LegacyOracleBookkeepingDB.py +++ b/src/LHCbDIRAC/BookkeepingSystem/DB/LegacyOracleBookkeepingDB.py @@ -3275,8 +3275,10 @@ AND files.qualityid= dataquality.qualityid" def getProductionNbOfEvents(self, prodid): """Number of event for a given production. + NOTE: This method only returns results for files with replicas! + :param int prodid: production number - :return: the number of events + :return: list of tuples of the form (filetype, number of events, eventtype, number of input events) """ return self.dbR_.executeStoredProcedure("BOOKKEEPINGORACLEDB.getNumberOfEvents", [prodid]) diff --git a/src/LHCbDIRAC/BookkeepingSystem/DB/NewOracleBookkeepingDB.py b/src/LHCbDIRAC/BookkeepingSystem/DB/NewOracleBookkeepingDB.py index 3c83ce03e3..f90e51dd8e 100644 --- a/src/LHCbDIRAC/BookkeepingSystem/DB/NewOracleBookkeepingDB.py +++ b/src/LHCbDIRAC/BookkeepingSystem/DB/NewOracleBookkeepingDB.py @@ -9,7 +9,7 @@ # or submit itself to any jurisdiction. # ############################################################################### from DIRAC import gLogger -from DIRAC.Core.Utilities.ReturnValues import S_OK, DReturnType +from DIRAC.Core.Utilities.ReturnValues import DReturnType, returnValueOrRaise, convertToReturnValue class NewOracleBookkeepingDB: @@ -22,26 +22,31 @@ class NewOracleBookkeepingDB: """Retrieve all available file types from the database.""" return self.dbR_.executeStoredProcedure("BOOKKEEPINGORACLEDB.getAvailableFileTypes", []) - def getAvailableSMOG2States(self) -> DReturnType[list[str]]: + @convertToReturnValue + def getFileTypesForProdID(self, prodID: int) -> list[str]: + query_parts = [ + "SELECT DISTINCT filetypes.name", + "FROM files, jobs, filetypes", + "WHERE files.jobid = jobs.jobid AND jobs.production = :prodid AND filetypes.filetypeid = files.filetypeid", + ] + result = returnValueOrRaise(self.dbR_.query(" ".join(query_parts), kwparams={"prodid": prodID})) + return [ft for ft, in result] + + @convertToReturnValue + def getAvailableSMOG2States(self) -> list[str]: """Retrieve all available SMOG2 states.""" - retVal = self.dbR_.query("SELECT state FROM smog2") - if not retVal["OK"]: - return retVal - return S_OK([i[0] for i in retVal["Value"]]) + result = returnValueOrRaise(self.dbR_.query("SELECT state FROM smog2")) + return [state for state, in result] - def getRunsForSMOG2(self, state: str) -> DReturnType[list[int]]: + @convertToReturnValue + def getRunsForSMOG2(self, state: str) -> list[int]: """Retrieve all runs with specified SMOG2 state :param sr state: required state """ - retVal = self.dbR_.query( - "SELECT runs.runnumber FROM smog2" - " LEFT JOIN runs ON runs.smog2_id = smog2.id" - f" WHERE smog2.state = '{state}'" - ) - if not retVal["OK"]: - return retVal - return S_OK([int(i[0]) for i in retVal["Value"]]) + query = "SELECT runs.runnumber FROM smog2 LEFT JOIN runs ON runs.smog2_id = smog2.id WHERE smog2.state = :state" + result = returnValueOrRaise(self.dbR_.query(query, kwparams={"state": state})) + return [run for run, in result] def setSMOG2State(self, state: str, update: bool, runs: list[int]) -> DReturnType[None]: """Set SMOG2 state for runs. diff --git a/src/LHCbDIRAC/BookkeepingSystem/Service/BookkeepingManagerHandler.py b/src/LHCbDIRAC/BookkeepingSystem/Service/BookkeepingManagerHandler.py index b6e540cd63..0c24c21b8f 100644 --- a/src/LHCbDIRAC/BookkeepingSystem/Service/BookkeepingManagerHandler.py +++ b/src/LHCbDIRAC/BookkeepingSystem/Service/BookkeepingManagerHandler.py @@ -13,6 +13,7 @@ from DIRAC import S_OK, S_ERROR from DIRAC.Core.DISET.RequestHandler import RequestHandler from DIRAC.Core.Utilities.Decorators import deprecated +from DIRAC.Core.Utilities.ReturnValues import convertToReturnValue, returnValueOrRaise from DIRAC.ConfigurationSystem.Client.PathFinder import getServiceSection from DIRAC.ConfigurationSystem.Client.Helpers import cfgPath from DIRAC.ConfigurationSystem.Client.Config import gConfig @@ -1254,7 +1255,11 @@ class BookkeepingManagerHandlerMixin: types_getProductionInformation = [int] def export_getProductionInformation(self, prodid): - """It returns statistics (data processing phases, number of events, etc.) for a given production""" + """It returns statistics (data processing phases, number of events, etc.) for a given production. + + NOTE: This method is EXTREMELY slow and should be used with caution. + NOTE: The returned paths are incorrect if we have no files GotReplica=True + """ nbjobs = None nbOfFiles = None @@ -1298,11 +1303,11 @@ class BookkeepingManagerHandlerMixin: path += res["Value"] prefix = "\n" + path - # FIXME: I think this will crash due to iterating over None if cls.bkkDB.getProductionNbOfEvents(prodid) fails. - # FIXME: I also have no idea what i is. At at glance I thought it was an integer but its being indexed? - # FIXME: Why only index 0 and 2? The docstring of getProductionNbOfEvents should probably be fixed. - for i in nbOfEvents: - path += prefix + "/" + str(i[2]) + "/" + i[0] + # At this point prefix is of the form '\n/ConfigName/ConfigVersion/SimulationConditions/ProcessingPass' + # We now iterate over nbOfEvents to get the paths containing the eventtype and filetype. + # Each path is separated by a newline character. + for filetype, _, eventtype, _ in nbOfEvents: + path += prefix + "/" + str(eventtype) + "/" + filetype result = { "Production information": prodinfos, "Number of jobs": nbjobs, @@ -1312,6 +1317,24 @@ class BookkeepingManagerHandlerMixin: } return S_OK(result) + ############################################################################# + types_getOutputPathsForProdID = [int] + + @convertToReturnValue + def export_getOutputPathsForProdID(self, prodID): + """It returns the file types for a given production.""" + + configs = returnValueOrRaise(self.bkkDB.getConfigsAndEvtType(prodID)) + conddesc = returnValueOrRaise(self.bkkDB.getProductionSimulationCond(prodID)) + procpass = returnValueOrRaise(self.bkkDB.getProductionProcessingPass(prodID)) + filetypes = returnValueOrRaise(self.bkkDB.getFileTypesForProdID(prodID)) + paths = [] + for config_name, config_version, eventtype in configs: + paths.extend( + f"/{config_name}/{config_version}/{conddesc}{procpass}/{eventtype}/{filetype}" for filetype in filetypes + ) + return paths + ############################################################################# types_getFileHistory = [str] -- GitLab