From 52a5b2b16e4e791ac134eeda9304e68caf0ad2a3 Mon Sep 17 00:00:00 2001 From: Ryunosuke O'Neil <r.oneil@cern.ch> Date: Wed, 16 Oct 2024 14:33:01 +0200 Subject: [PATCH 1/7] added require_has_publication to getProductions Formatting and fix spelling --- .../ProductionManagementSystem/DB/AnalysisProductionsDB.py | 5 +++++ .../Service/TornadoAnalysisProductionsHandler.py | 2 ++ 2 files changed, 7 insertions(+) diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py index cb4708f2b6..460980712e 100644 --- a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py +++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py @@ -203,6 +203,7 @@ class AnalysisProductionsDB(DIRACDB): state=None, at_time=None, show_archived=False, + require_has_publication=False, session: Session, ): query = select( @@ -246,6 +247,10 @@ class AnalysisProductionsDB(DIRACDB): mque = select(query, pub_q.c.publications) mque = mque.join(pub_q, pub_q.c.sample_id == query.c.sample_id, isouter=True) + if require_has_publication: + # Return a sample only if it has a publication number assigned to it + query = mque.filter(func.json_array_length(pub_q.c.publications) > 0) + results = [] for row in session.execute(mque).all(): result = { diff --git a/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py b/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py index 9b6931d09c..e8b7d26a80 100644 --- a/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py +++ b/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py @@ -91,6 +91,7 @@ class TornadoAnalysisProductionsHandler(TornadoService): with_transformations, at_time, show_archived, + require_has_publication, ): """See :meth:`~.AnalysisProductionsClient.getProductions`""" if (analysis or name or version) and wg is None: @@ -105,6 +106,7 @@ class TornadoAnalysisProductionsHandler(TornadoService): state=state, at_time=at_time, show_archived=show_archived, + require_has_publication=require_has_publication, ) return _queryToResults(results, with_lfns, with_pfns, with_transformations) -- GitLab From 9129fe8f0c6b8f9195224fc85980fd7efd2450e3 Mon Sep 17 00:00:00 2001 From: Ryunosuke O'Neil <r.oneil@cern.ch> Date: Wed, 16 Oct 2024 17:18:29 +0200 Subject: [PATCH 2/7] Add input query to transformation info of getProductions. --- .../Service/TornadoAnalysisProductionsHandler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py b/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py index e8b7d26a80..d342d24dc1 100644 --- a/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py +++ b/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py @@ -273,6 +273,8 @@ def _queryToResults(results, with_lfns, with_pfns, with_transformations): extraTransInfos = {t["TransformationID"]: t for t in returnValueOrRaise(retVal)} for tInfo in chain(*(r["transformations"] for r in results)): tInfo["status"] = extraTransInfos[tInfo["id"]]["Status"] + # while we're at it, add the input query too + tInfo["input_query"] = returnValueOrRaise(tClient.getBookkeepingQuery(tInfo["id"])) else: for result in results: del result["transformations"] -- GitLab From d07961ab13878aa0115e2c867c10c09a2f3f0beb Mon Sep 17 00:00:00 2001 From: Ryunosuke O'Neil <r.oneil@cern.ch> Date: Thu, 17 Oct 2024 08:34:42 +0200 Subject: [PATCH 3/7] use GetBookkeepingQueries instead --- .../Service/TornadoAnalysisProductionsHandler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py b/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py index d342d24dc1..d3bb0bfb3e 100644 --- a/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py +++ b/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py @@ -271,10 +271,10 @@ def _queryToResults(results, with_lfns, with_pfns, with_transformations): columns=["TransformationID", "Status"], ) extraTransInfos = {t["TransformationID"]: t for t in returnValueOrRaise(retVal)} + inputQueries = returnValueOrRaise(tClient.getBookkeepingQueries(tIDs)) for tInfo in chain(*(r["transformations"] for r in results)): tInfo["status"] = extraTransInfos[tInfo["id"]]["Status"] - # while we're at it, add the input query too - tInfo["input_query"] = returnValueOrRaise(tClient.getBookkeepingQuery(tInfo["id"])) + tInfo["input_query"] = inputQueries[tInfo["id"]] else: for result in results: del result["transformations"] -- GitLab From 8a71ddb4c89f637d2f23bfd5f157430a4ece3b47 Mon Sep 17 00:00:00 2001 From: Ryunosuke O'Neil <r.oneil@cern.ch> Date: Thu, 17 Oct 2024 10:04:29 +0200 Subject: [PATCH 4/7] Add test and fix condition --- .../DB/AnalysisProductionsDB.py | 2 +- .../DB/tests/Test_AnalysisProductionsDB.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py index 460980712e..2f11eb1b4f 100644 --- a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py +++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py @@ -249,7 +249,7 @@ class AnalysisProductionsDB(DIRACDB): if require_has_publication: # Return a sample only if it has a publication number assigned to it - query = mque.filter(func.json_array_length(pub_q.c.publications) > 0) + mque = mque.filter(func.json_array_length(pub_q.c.publications) > 0) results = [] for row in session.execute(mque).all(): diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py index 121e3c022a..a6bc978e5e 100644 --- a/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py +++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py @@ -358,6 +358,11 @@ def test_addPublication(apdb): sample_ids = [1, 2, 3] + prods_with_pubs = apdb.getProductions(require_has_publication=True) + assert ( + len(prods_with_pubs) == 0 + ), "getProductions(require_has_publication=True) did not return empty, when it should have" + with pytest.raises(ValueError, match=r"This publication number is too long.*"): apdb.addPublication(sample_ids, "LHCb-" + "h" * 75) @@ -388,6 +393,11 @@ def test_addPublication(apdb): assert "LHCb-PAPER-YYYY-NNN" not in prods_with_pubs[1]["publications"] assert "LHCb-PAPER-YYYY-NNN" not in prods_with_pubs[2]["publications"] + prods_with_pubs = apdb.getProductions(require_has_publication=True) + assert ( + len(prods_with_pubs) == 3 + ), "getProductions(require_has_publication=True) did not return the three requests with publications" + def test_get_and_delayHousekeepingInteractionDue(apdb): apdb.registerRequests([REQUEST_1, REQUEST_2, REQUEST_3]) -- GitLab From 9f54b3f575ab8688308be9714bf6376a178904cd Mon Sep 17 00:00:00 2001 From: Ryunosuke O'Neil <r.oneil@cern.ch> Date: Thu, 17 Oct 2024 10:30:56 +0200 Subject: [PATCH 5/7] Made getProductions more useful and adapted the test. --- .../DB/AnalysisProductionsDB.py | 35 ++++++++++++++-- .../DB/tests/Test_AnalysisProductionsDB.py | 41 ++++++++++++++++--- 2 files changed, 66 insertions(+), 10 deletions(-) diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py index 2f11eb1b4f..9f9331ff88 100644 --- a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py +++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py @@ -490,12 +490,39 @@ class AnalysisProductionsDB(DIRACDB): @inject_session def getPublications(self, sample_ids: list[int], *, session: Session): - numbers = defaultdict(set) - query = select(Publication.sample_id, Publication.number) + numbers = defaultdict(list) + + ap_q = select( + AP.wg, + AP.analysis, + AP.sample_id, + AP.validity_start, + AP.validity_end, + AP.name, + AP.version, + AP.request_id, + AP.state, + ).subquery(name="samples") + + query = select(Publication.number, Publication.sample_id, ap_q) + query = query.join(AP, AP.sample_id == Publication.sample_id) + if sample_ids: query = query.filter(Publication.sample_id.in_(sample_ids)) - for id_, n in session.execute(query).all(): - numbers[n].add(id_) + for row in session.execute(query).all(): + numbers[row.number].append( + { + "sample_id": row.sample_id, + "request_id": row.request_id, + "wg": row.wg, + "analysis": row.analysis, + "name": row.name, + "version": row.version, + "state": row.state, + "validity_start": row.validity_start, + "validity_end": row.validity_end, + } + ) return numbers @inject_session diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py index a6bc978e5e..4f9021f42e 100644 --- a/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py +++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py @@ -356,6 +356,15 @@ def test_addPublication(apdb): requests = apdb.listRequests() assert len(requests) == 3 + def result_has_pub(r, n, sample_id=None): + if n not in r: + return False + if sample_id: + if not any(i["sample_id"] == sample_id for i in r[n]): + return False + + return True + sample_ids = [1, 2, 3] prods_with_pubs = apdb.getProductions(require_has_publication=True) @@ -369,18 +378,38 @@ def test_addPublication(apdb): apdb.addPublication(sample_ids, "LHCb-ANA-YYYY-NNN") sample_publications = apdb.getPublications(sample_ids) - assert "LHCb-ANA-YYYY-NNN" in sample_publications.keys() - assert sample_ids[0] in sample_publications["LHCb-ANA-YYYY-NNN"] - assert sample_ids[1] in sample_publications["LHCb-ANA-YYYY-NNN"] - assert sample_ids[2] in sample_publications["LHCb-ANA-YYYY-NNN"] + assert result_has_pub(sample_publications, "LHCb-ANA-YYYY-NNN"), 'Result doesn\'t have "LHCb-ANA-YYYY-NNN" at all' + assert result_has_pub( + sample_publications, "LHCb-ANA-YYYY-NNN", sample_id=sample_ids[0] + ), f'Result doesn\'t have "LHCb-ANA-YYYY-NNN" for sample_id {sample_ids[0]}' + assert result_has_pub( + sample_publications, "LHCb-ANA-YYYY-NNN", sample_id=sample_ids[1] + ), f'Result doesn\'t have "LHCb-ANA-YYYY-NNN" for sample_id {sample_ids[1]}' + assert result_has_pub( + sample_publications, "LHCb-ANA-YYYY-NNN", sample_id=sample_ids[2] + ), f'Result doesn\'t have "LHCb-ANA-YYYY-NNN" for sample_id {sample_ids[2]}' with pytest.raises(IntegrityError): apdb.addPublication(sample_ids, "LHCb-ANA-YYYY-NNN") apdb.addPublication([sample_ids[0]], "LHCb-PAPER-YYYY-NNN") sample_publications = apdb.getPublications(sample_ids) - assert sample_ids[0] in sample_publications["LHCb-ANA-YYYY-NNN"] - assert sample_ids[0] in sample_publications["LHCb-PAPER-YYYY-NNN"] + + assert result_has_pub( + sample_publications, "LHCb-PAPER-YYYY-NNN" + ), 'Result doesn\'t have "LHCb-PAPER-YYYY-NNN" at all' + assert result_has_pub( + sample_publications, "LHCb-ANA-YYYY-NNN", sample_id=sample_ids[0] + ), f'Result doesn\'t have "LHCb-ANA-YYYY-NNN" for sample_id {sample_ids[0]}' + assert result_has_pub( + sample_publications, "LHCb-PAPER-YYYY-NNN", sample_id=sample_ids[0] + ), f'Result doesn\'t have "LHCb-PAPER-YYYY-NNN" for sample_id {sample_ids[0]}' + assert not result_has_pub( + sample_publications, "LHCb-PAPER-YYYY-NNN", sample_id=sample_ids[1] + ), f'Result shouldn\'t have "LHCb-PAPER-YYYY-NNN" for sample_id {sample_ids[1]}' + assert not result_has_pub( + sample_publications, "LHCb-PAPER-YYYY-NNN", sample_id=sample_ids[2] + ), f'Result shouldn\'t have "LHCb-PAPER-YYYY-NNN" for sample_id {sample_ids[2]}' prods_with_pubs = apdb.getProductions() -- GitLab From ef0cff16b938e72bbeac3d636ba73ae6db31c141 Mon Sep 17 00:00:00 2001 From: Ryunosuke O'Neil <r.oneil@cern.ch> Date: Thu, 17 Oct 2024 10:33:20 +0200 Subject: [PATCH 6/7] Fix sample_ids type --- .../ProductionManagementSystem/DB/AnalysisProductionsDB.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py index 9f9331ff88..0dadbff0a0 100644 --- a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py +++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py @@ -489,7 +489,7 @@ class AnalysisProductionsDB(DIRACDB): session.execute(query) @inject_session - def getPublications(self, sample_ids: list[int], *, session: Session): + def getPublications(self, sample_ids: list[int] | None, *, session: Session): numbers = defaultdict(list) ap_q = select( -- GitLab From d8bd6bd479a2660202f65f0b9c03182f4afca14b Mon Sep 17 00:00:00 2001 From: Ryunosuke O'Neil <r.oneil@cern.ch> Date: Thu, 17 Oct 2024 10:34:16 +0200 Subject: [PATCH 7/7] make sample_ids optional --- .../ProductionManagementSystem/DB/AnalysisProductionsDB.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py index 0dadbff0a0..45919bdd4c 100644 --- a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py +++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py @@ -489,7 +489,7 @@ class AnalysisProductionsDB(DIRACDB): session.execute(query) @inject_session - def getPublications(self, sample_ids: list[int] | None, *, session: Session): + def getPublications(self, sample_ids: list[int] | None = None, *, session: Session): numbers = defaultdict(list) ap_q = select( -- GitLab