From 11e8a420c04819587b98009c700329a86825b9a9 Mon Sep 17 00:00:00 2001
From: Federico Stagni <federico.stagni@cern.ch>
Date: Fri, 5 Apr 2024 15:06:18 +0200
Subject: [PATCH] Merge branch 'prod-id-to-path' into 'master'

[master] Add BookkeepingClient.getOutputPathsForProdID

See merge request lhcb-dirac/LHCbDIRAC!1543

(cherry picked from commit d964d21ac39ea901ffa126707ebc88e3805835fe)

82a0c617 doc: Clarify export_getProductionInformation
92b15705 refactor: Modernise NewOracleBookkeepingDB
31bd5c47 feat: Add BookkeepingClient.getOutputPathsForProdID
---
 .../DB/LegacyOracleBookkeepingDB.py           |  4 ++-
 .../DB/NewOracleBookkeepingDB.py              | 35 +++++++++++--------
 .../Service/BookkeepingManagerHandler.py      | 35 +++++++++++++++----
 3 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/src/LHCbDIRAC/BookkeepingSystem/DB/LegacyOracleBookkeepingDB.py b/src/LHCbDIRAC/BookkeepingSystem/DB/LegacyOracleBookkeepingDB.py
index df37bce6fc..3ce021541c 100644
--- a/src/LHCbDIRAC/BookkeepingSystem/DB/LegacyOracleBookkeepingDB.py
+++ b/src/LHCbDIRAC/BookkeepingSystem/DB/LegacyOracleBookkeepingDB.py
@@ -3275,8 +3275,10 @@ AND files.qualityid= dataquality.qualityid"
     def getProductionNbOfEvents(self, prodid):
         """Number of event for a given production.
 
+        NOTE: This method only returns results for files with replicas!
+
         :param int prodid: production number
-        :return: the number of events
+        :return: list of tuples of the form (filetype, number of events, eventtype, number of input events)
         """
         return self.dbR_.executeStoredProcedure("BOOKKEEPINGORACLEDB.getNumberOfEvents", [prodid])
 
diff --git a/src/LHCbDIRAC/BookkeepingSystem/DB/NewOracleBookkeepingDB.py b/src/LHCbDIRAC/BookkeepingSystem/DB/NewOracleBookkeepingDB.py
index 3c83ce03e3..f90e51dd8e 100644
--- a/src/LHCbDIRAC/BookkeepingSystem/DB/NewOracleBookkeepingDB.py
+++ b/src/LHCbDIRAC/BookkeepingSystem/DB/NewOracleBookkeepingDB.py
@@ -9,7 +9,7 @@
 # or submit itself to any jurisdiction.                                       #
 ###############################################################################
 from DIRAC import gLogger
-from DIRAC.Core.Utilities.ReturnValues import S_OK, DReturnType
+from DIRAC.Core.Utilities.ReturnValues import DReturnType, returnValueOrRaise, convertToReturnValue
 
 
 class NewOracleBookkeepingDB:
@@ -22,26 +22,31 @@ class NewOracleBookkeepingDB:
         """Retrieve all available file types from the database."""
         return self.dbR_.executeStoredProcedure("BOOKKEEPINGORACLEDB.getAvailableFileTypes", [])
 
-    def getAvailableSMOG2States(self) -> DReturnType[list[str]]:
+    @convertToReturnValue
+    def getFileTypesForProdID(self, prodID: int) -> list[str]:
+        query_parts = [
+            "SELECT DISTINCT filetypes.name",
+            "FROM files, jobs, filetypes",
+            "WHERE files.jobid = jobs.jobid AND jobs.production = :prodid AND filetypes.filetypeid = files.filetypeid",
+        ]
+        result = returnValueOrRaise(self.dbR_.query(" ".join(query_parts), kwparams={"prodid": prodID}))
+        return [ft for ft, in result]
+
+    @convertToReturnValue
+    def getAvailableSMOG2States(self) -> list[str]:
         """Retrieve all available SMOG2 states."""
-        retVal = self.dbR_.query("SELECT state FROM smog2")
-        if not retVal["OK"]:
-            return retVal
-        return S_OK([i[0] for i in retVal["Value"]])
+        result = returnValueOrRaise(self.dbR_.query("SELECT state FROM smog2"))
+        return [state for state, in result]
 
-    def getRunsForSMOG2(self, state: str) -> DReturnType[list[int]]:
+    @convertToReturnValue
+    def getRunsForSMOG2(self, state: str) -> list[int]:
         """Retrieve all runs with specified SMOG2 state
 
         :param sr state: required state
         """
-        retVal = self.dbR_.query(
-            "SELECT runs.runnumber FROM smog2"
-            " LEFT JOIN runs ON runs.smog2_id = smog2.id"
-            f" WHERE smog2.state  = '{state}'"
-        )
-        if not retVal["OK"]:
-            return retVal
-        return S_OK([int(i[0]) for i in retVal["Value"]])
+        query = "SELECT runs.runnumber FROM smog2 LEFT JOIN runs ON runs.smog2_id = smog2.id WHERE smog2.state = :state"
+        result = returnValueOrRaise(self.dbR_.query(query, kwparams={"state": state}))
+        return [run for run, in result]
 
     def setSMOG2State(self, state: str, update: bool, runs: list[int]) -> DReturnType[None]:
         """Set SMOG2 state for runs.
diff --git a/src/LHCbDIRAC/BookkeepingSystem/Service/BookkeepingManagerHandler.py b/src/LHCbDIRAC/BookkeepingSystem/Service/BookkeepingManagerHandler.py
index b6e540cd63..0c24c21b8f 100644
--- a/src/LHCbDIRAC/BookkeepingSystem/Service/BookkeepingManagerHandler.py
+++ b/src/LHCbDIRAC/BookkeepingSystem/Service/BookkeepingManagerHandler.py
@@ -13,6 +13,7 @@
 from DIRAC import S_OK, S_ERROR
 from DIRAC.Core.DISET.RequestHandler import RequestHandler
 from DIRAC.Core.Utilities.Decorators import deprecated
+from DIRAC.Core.Utilities.ReturnValues import convertToReturnValue, returnValueOrRaise
 from DIRAC.ConfigurationSystem.Client.PathFinder import getServiceSection
 from DIRAC.ConfigurationSystem.Client.Helpers import cfgPath
 from DIRAC.ConfigurationSystem.Client.Config import gConfig
@@ -1254,7 +1255,11 @@ class BookkeepingManagerHandlerMixin:
     types_getProductionInformation = [int]
 
     def export_getProductionInformation(self, prodid):
-        """It returns statistics (data processing phases, number of events, etc.) for a given production"""
+        """It returns statistics (data processing phases, number of events, etc.) for a given production.
+
+        NOTE: This method is EXTREMELY slow and should be used with caution.
+        NOTE: The returned paths are incorrect if we have no files GotReplica=True
+        """
 
         nbjobs = None
         nbOfFiles = None
@@ -1298,11 +1303,11 @@ class BookkeepingManagerHandlerMixin:
         path += res["Value"]
         prefix = "\n" + path
 
-        # FIXME: I think this will crash due to iterating over None if cls.bkkDB.getProductionNbOfEvents(prodid) fails.
-        # FIXME: I also have no idea what i is. At at glance I thought it was an integer but its being indexed?
-        # FIXME: Why only index 0 and 2? The docstring of getProductionNbOfEvents should probably be fixed.
-        for i in nbOfEvents:
-            path += prefix + "/" + str(i[2]) + "/" + i[0]
+        # At this point prefix is of the form '\n/ConfigName/ConfigVersion/SimulationConditions/ProcessingPass'
+        # We now iterate over nbOfEvents to get the paths containing the eventtype and filetype.
+        # Each path is separated by a newline character.
+        for filetype, _, eventtype, _ in nbOfEvents:
+            path += prefix + "/" + str(eventtype) + "/" + filetype
         result = {
             "Production information": prodinfos,
             "Number of jobs": nbjobs,
@@ -1312,6 +1317,24 @@ class BookkeepingManagerHandlerMixin:
         }
         return S_OK(result)
 
+    #############################################################################
+    types_getOutputPathsForProdID = [int]
+
+    @convertToReturnValue
+    def export_getOutputPathsForProdID(self, prodID):
+        """It returns the file types for a given production."""
+
+        configs = returnValueOrRaise(self.bkkDB.getConfigsAndEvtType(prodID))
+        conddesc = returnValueOrRaise(self.bkkDB.getProductionSimulationCond(prodID))
+        procpass = returnValueOrRaise(self.bkkDB.getProductionProcessingPass(prodID))
+        filetypes = returnValueOrRaise(self.bkkDB.getFileTypesForProdID(prodID))
+        paths = []
+        for config_name, config_version, eventtype in configs:
+            paths.extend(
+                f"/{config_name}/{config_version}/{conddesc}{procpass}/{eventtype}/{filetype}" for filetype in filetypes
+            )
+        return paths
+
     #############################################################################
     types_getFileHistory = [str]
 
-- 
GitLab