From 52a5b2b16e4e791ac134eeda9304e68caf0ad2a3 Mon Sep 17 00:00:00 2001
From: Ryunosuke O'Neil <r.oneil@cern.ch>
Date: Wed, 16 Oct 2024 14:33:01 +0200
Subject: [PATCH 1/7] added require_has_publication to getProductions

Formatting and fix spelling
---
 .../ProductionManagementSystem/DB/AnalysisProductionsDB.py   | 5 +++++
 .../Service/TornadoAnalysisProductionsHandler.py             | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py
index cb4708f2b6..460980712e 100644
--- a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py
+++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py
@@ -203,6 +203,7 @@ class AnalysisProductionsDB(DIRACDB):
         state=None,
         at_time=None,
         show_archived=False,
+        require_has_publication=False,
         session: Session,
     ):
         query = select(
@@ -246,6 +247,10 @@ class AnalysisProductionsDB(DIRACDB):
         mque = select(query, pub_q.c.publications)
         mque = mque.join(pub_q, pub_q.c.sample_id == query.c.sample_id, isouter=True)
 
+        if require_has_publication:
+            # Return a sample only if it has a publication number assigned to it
+            query = mque.filter(func.json_array_length(pub_q.c.publications) > 0)
+
         results = []
         for row in session.execute(mque).all():
             result = {
diff --git a/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py b/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py
index 9b6931d09c..e8b7d26a80 100644
--- a/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py
+++ b/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py
@@ -91,6 +91,7 @@ class TornadoAnalysisProductionsHandler(TornadoService):
         with_transformations,
         at_time,
         show_archived,
+        require_has_publication,
     ):
         """See :meth:`~.AnalysisProductionsClient.getProductions`"""
         if (analysis or name or version) and wg is None:
@@ -105,6 +106,7 @@ class TornadoAnalysisProductionsHandler(TornadoService):
             state=state,
             at_time=at_time,
             show_archived=show_archived,
+            require_has_publication=require_has_publication,
         )
         return _queryToResults(results, with_lfns, with_pfns, with_transformations)
 
-- 
GitLab


From 9129fe8f0c6b8f9195224fc85980fd7efd2450e3 Mon Sep 17 00:00:00 2001
From: Ryunosuke O'Neil <r.oneil@cern.ch>
Date: Wed, 16 Oct 2024 17:18:29 +0200
Subject: [PATCH 2/7] Add input query to transformation info of getProductions.

---
 .../Service/TornadoAnalysisProductionsHandler.py                | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py b/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py
index e8b7d26a80..d342d24dc1 100644
--- a/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py
+++ b/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py
@@ -273,6 +273,8 @@ def _queryToResults(results, with_lfns, with_pfns, with_transformations):
             extraTransInfos = {t["TransformationID"]: t for t in returnValueOrRaise(retVal)}
             for tInfo in chain(*(r["transformations"] for r in results)):
                 tInfo["status"] = extraTransInfos[tInfo["id"]]["Status"]
+                # while we're at it, add the input query too
+                tInfo["input_query"] = returnValueOrRaise(tClient.getBookkeepingQuery(tInfo["id"]))
     else:
         for result in results:
             del result["transformations"]
-- 
GitLab


From d07961ab13878aa0115e2c867c10c09a2f3f0beb Mon Sep 17 00:00:00 2001
From: Ryunosuke O'Neil <r.oneil@cern.ch>
Date: Thu, 17 Oct 2024 08:34:42 +0200
Subject: [PATCH 3/7] use GetBookkeepingQueries instead

---
 .../Service/TornadoAnalysisProductionsHandler.py              | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py b/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py
index d342d24dc1..d3bb0bfb3e 100644
--- a/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py
+++ b/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py
@@ -271,10 +271,10 @@ def _queryToResults(results, with_lfns, with_pfns, with_transformations):
                 columns=["TransformationID", "Status"],
             )
             extraTransInfos = {t["TransformationID"]: t for t in returnValueOrRaise(retVal)}
+            inputQueries = returnValueOrRaise(tClient.getBookkeepingQueries(tIDs))
             for tInfo in chain(*(r["transformations"] for r in results)):
                 tInfo["status"] = extraTransInfos[tInfo["id"]]["Status"]
-                # while we're at it, add the input query too
-                tInfo["input_query"] = returnValueOrRaise(tClient.getBookkeepingQuery(tInfo["id"]))
+                tInfo["input_query"] = inputQueries[tInfo["id"]]
     else:
         for result in results:
             del result["transformations"]
-- 
GitLab


From 8a71ddb4c89f637d2f23bfd5f157430a4ece3b47 Mon Sep 17 00:00:00 2001
From: Ryunosuke O'Neil <r.oneil@cern.ch>
Date: Thu, 17 Oct 2024 10:04:29 +0200
Subject: [PATCH 4/7] Add test and fix condition

---
 .../DB/AnalysisProductionsDB.py                        |  2 +-
 .../DB/tests/Test_AnalysisProductionsDB.py             | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py
index 460980712e..2f11eb1b4f 100644
--- a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py
+++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py
@@ -249,7 +249,7 @@ class AnalysisProductionsDB(DIRACDB):
 
         if require_has_publication:
             # Return a sample only if it has a publication number assigned to it
-            query = mque.filter(func.json_array_length(pub_q.c.publications) > 0)
+            mque = mque.filter(func.json_array_length(pub_q.c.publications) > 0)
 
         results = []
         for row in session.execute(mque).all():
diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py
index 121e3c022a..a6bc978e5e 100644
--- a/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py
+++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py
@@ -358,6 +358,11 @@ def test_addPublication(apdb):
 
     sample_ids = [1, 2, 3]
 
+    prods_with_pubs = apdb.getProductions(require_has_publication=True)
+    assert (
+        len(prods_with_pubs) == 0
+    ), "getProductions(require_has_publication=True) did not return empty, when it should have"
+
     with pytest.raises(ValueError, match=r"This publication number is too long.*"):
         apdb.addPublication(sample_ids, "LHCb-" + "h" * 75)
 
@@ -388,6 +393,11 @@ def test_addPublication(apdb):
     assert "LHCb-PAPER-YYYY-NNN" not in prods_with_pubs[1]["publications"]
     assert "LHCb-PAPER-YYYY-NNN" not in prods_with_pubs[2]["publications"]
 
+    prods_with_pubs = apdb.getProductions(require_has_publication=True)
+    assert (
+        len(prods_with_pubs) == 3
+    ), "getProductions(require_has_publication=True) did not return the three requests with publications"
+
 
 def test_get_and_delayHousekeepingInteractionDue(apdb):
     apdb.registerRequests([REQUEST_1, REQUEST_2, REQUEST_3])
-- 
GitLab


From 9f54b3f575ab8688308be9714bf6376a178904cd Mon Sep 17 00:00:00 2001
From: Ryunosuke O'Neil <r.oneil@cern.ch>
Date: Thu, 17 Oct 2024 10:30:56 +0200
Subject: [PATCH 5/7] Made getProductions more useful and adapted the test.

---
 .../DB/AnalysisProductionsDB.py               | 35 ++++++++++++++--
 .../DB/tests/Test_AnalysisProductionsDB.py    | 41 ++++++++++++++++---
 2 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py
index 2f11eb1b4f..9f9331ff88 100644
--- a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py
+++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py
@@ -490,12 +490,39 @@ class AnalysisProductionsDB(DIRACDB):
 
     @inject_session
     def getPublications(self, sample_ids: list[int], *, session: Session):
-        numbers = defaultdict(set)
-        query = select(Publication.sample_id, Publication.number)
+        numbers = defaultdict(list)
+
+        ap_q = select(
+            AP.wg,
+            AP.analysis,
+            AP.sample_id,
+            AP.validity_start,
+            AP.validity_end,
+            AP.name,
+            AP.version,
+            AP.request_id,
+            AP.state,
+        ).subquery(name="samples")
+
+        query = select(Publication.number, Publication.sample_id, ap_q)
+        query = query.join(AP, AP.sample_id == Publication.sample_id)
+
         if sample_ids:
             query = query.filter(Publication.sample_id.in_(sample_ids))
-        for id_, n in session.execute(query).all():
-            numbers[n].add(id_)
+        for row in session.execute(query).all():
+            numbers[row.number].append(
+                {
+                    "sample_id": row.sample_id,
+                    "request_id": row.request_id,
+                    "wg": row.wg,
+                    "analysis": row.analysis,
+                    "name": row.name,
+                    "version": row.version,
+                    "state": row.state,
+                    "validity_start": row.validity_start,
+                    "validity_end": row.validity_end,
+                }
+            )
         return numbers
 
     @inject_session
diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py
index a6bc978e5e..4f9021f42e 100644
--- a/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py
+++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py
@@ -356,6 +356,15 @@ def test_addPublication(apdb):
     requests = apdb.listRequests()
     assert len(requests) == 3
 
+    def result_has_pub(r, n, sample_id=None):
+        if n not in r:
+            return False
+        if sample_id:
+            if not any(i["sample_id"] == sample_id for i in r[n]):
+                return False
+
+        return True
+
     sample_ids = [1, 2, 3]
 
     prods_with_pubs = apdb.getProductions(require_has_publication=True)
@@ -369,18 +378,38 @@ def test_addPublication(apdb):
     apdb.addPublication(sample_ids, "LHCb-ANA-YYYY-NNN")
 
     sample_publications = apdb.getPublications(sample_ids)
-    assert "LHCb-ANA-YYYY-NNN" in sample_publications.keys()
-    assert sample_ids[0] in sample_publications["LHCb-ANA-YYYY-NNN"]
-    assert sample_ids[1] in sample_publications["LHCb-ANA-YYYY-NNN"]
-    assert sample_ids[2] in sample_publications["LHCb-ANA-YYYY-NNN"]
+    assert result_has_pub(sample_publications, "LHCb-ANA-YYYY-NNN"), 'Result doesn\'t have "LHCb-ANA-YYYY-NNN" at all'
+    assert result_has_pub(
+        sample_publications, "LHCb-ANA-YYYY-NNN", sample_id=sample_ids[0]
+    ), f'Result doesn\'t have "LHCb-ANA-YYYY-NNN" for sample_id {sample_ids[0]}'
+    assert result_has_pub(
+        sample_publications, "LHCb-ANA-YYYY-NNN", sample_id=sample_ids[1]
+    ), f'Result doesn\'t have "LHCb-ANA-YYYY-NNN" for sample_id {sample_ids[1]}'
+    assert result_has_pub(
+        sample_publications, "LHCb-ANA-YYYY-NNN", sample_id=sample_ids[2]
+    ), f'Result doesn\'t have "LHCb-ANA-YYYY-NNN" for sample_id {sample_ids[2]}'
 
     with pytest.raises(IntegrityError):
         apdb.addPublication(sample_ids, "LHCb-ANA-YYYY-NNN")
 
     apdb.addPublication([sample_ids[0]], "LHCb-PAPER-YYYY-NNN")
     sample_publications = apdb.getPublications(sample_ids)
-    assert sample_ids[0] in sample_publications["LHCb-ANA-YYYY-NNN"]
-    assert sample_ids[0] in sample_publications["LHCb-PAPER-YYYY-NNN"]
+
+    assert result_has_pub(
+        sample_publications, "LHCb-PAPER-YYYY-NNN"
+    ), 'Result doesn\'t have "LHCb-PAPER-YYYY-NNN" at all'
+    assert result_has_pub(
+        sample_publications, "LHCb-ANA-YYYY-NNN", sample_id=sample_ids[0]
+    ), f'Result doesn\'t have "LHCb-ANA-YYYY-NNN" for sample_id {sample_ids[0]}'
+    assert result_has_pub(
+        sample_publications, "LHCb-PAPER-YYYY-NNN", sample_id=sample_ids[0]
+    ), f'Result doesn\'t have "LHCb-PAPER-YYYY-NNN" for sample_id {sample_ids[0]}'
+    assert not result_has_pub(
+        sample_publications, "LHCb-PAPER-YYYY-NNN", sample_id=sample_ids[1]
+    ), f'Result shouldn\'t have "LHCb-PAPER-YYYY-NNN" for sample_id {sample_ids[1]}'
+    assert not result_has_pub(
+        sample_publications, "LHCb-PAPER-YYYY-NNN", sample_id=sample_ids[2]
+    ), f'Result shouldn\'t have "LHCb-PAPER-YYYY-NNN" for sample_id {sample_ids[2]}'
 
     prods_with_pubs = apdb.getProductions()
 
-- 
GitLab


From ef0cff16b938e72bbeac3d636ba73ae6db31c141 Mon Sep 17 00:00:00 2001
From: Ryunosuke O'Neil <r.oneil@cern.ch>
Date: Thu, 17 Oct 2024 10:33:20 +0200
Subject: [PATCH 6/7] Fix sample_ids type

---
 .../ProductionManagementSystem/DB/AnalysisProductionsDB.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py
index 9f9331ff88..0dadbff0a0 100644
--- a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py
+++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py
@@ -489,7 +489,7 @@ class AnalysisProductionsDB(DIRACDB):
         session.execute(query)
 
     @inject_session
-    def getPublications(self, sample_ids: list[int], *, session: Session):
+    def getPublications(self, sample_ids: list[int] | None, *, session: Session):
         numbers = defaultdict(list)
 
         ap_q = select(
-- 
GitLab


From d8bd6bd479a2660202f65f0b9c03182f4afca14b Mon Sep 17 00:00:00 2001
From: Ryunosuke O'Neil <r.oneil@cern.ch>
Date: Thu, 17 Oct 2024 10:34:16 +0200
Subject: [PATCH 7/7] make sample_ids optional

---
 .../ProductionManagementSystem/DB/AnalysisProductionsDB.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py
index 0dadbff0a0..45919bdd4c 100644
--- a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py
+++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py
@@ -489,7 +489,7 @@ class AnalysisProductionsDB(DIRACDB):
         session.execute(query)
 
     @inject_session
-    def getPublications(self, sample_ids: list[int] | None, *, session: Session):
+    def getPublications(self, sample_ids: list[int] | None = None, *, session: Session):
         numbers = defaultdict(list)
 
         ap_q = select(
-- 
GitLab