diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py index cb4708f2b64b38c89e463ec524ba61343e6fea37..45919bdd4c8e67f44bbe7fa01d89dcf284668011 100644 --- a/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py +++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/AnalysisProductionsDB.py @@ -203,6 +203,7 @@ class AnalysisProductionsDB(DIRACDB): state=None, at_time=None, show_archived=False, + require_has_publication=False, session: Session, ): query = select( @@ -246,6 +247,10 @@ class AnalysisProductionsDB(DIRACDB): mque = select(query, pub_q.c.publications) mque = mque.join(pub_q, pub_q.c.sample_id == query.c.sample_id, isouter=True) + if require_has_publication: + # Return a sample only if it has a publication number assigned to it + mque = mque.filter(func.json_array_length(pub_q.c.publications) > 0) + results = [] for row in session.execute(mque).all(): result = { @@ -484,13 +489,40 @@ class AnalysisProductionsDB(DIRACDB): session.execute(query) @inject_session - def getPublications(self, sample_ids: list[int], *, session: Session): - numbers = defaultdict(set) - query = select(Publication.sample_id, Publication.number) + def getPublications(self, sample_ids: list[int] | None = None, *, session: Session): + numbers = defaultdict(list) + + ap_q = select( + AP.wg, + AP.analysis, + AP.sample_id, + AP.validity_start, + AP.validity_end, + AP.name, + AP.version, + AP.request_id, + AP.state, + ).subquery(name="samples") + + query = select(Publication.number, Publication.sample_id, ap_q) + query = query.join(AP, AP.sample_id == Publication.sample_id) + if sample_ids: query = query.filter(Publication.sample_id.in_(sample_ids)) - for id_, n in session.execute(query).all(): - numbers[n].add(id_) + for row in session.execute(query).all(): + numbers[row.number].append( + { + "sample_id": row.sample_id, + "request_id": row.request_id, + "wg": row.wg, + "analysis": row.analysis, + "name": row.name, + "version": row.version, + "state": row.state, + "validity_start": row.validity_start, + "validity_end": row.validity_end, + } + ) return numbers @inject_session diff --git a/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py b/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py index 121e3c022ac15b01d98f45decf87f9259b957e85..4f9021f42ecf7d4ed735a7013531d1dd1ce69ceb 100644 --- a/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py +++ b/src/LHCbDIRAC/ProductionManagementSystem/DB/tests/Test_AnalysisProductionsDB.py @@ -356,26 +356,60 @@ def test_addPublication(apdb): requests = apdb.listRequests() assert len(requests) == 3 + def result_has_pub(r, n, sample_id=None): + if n not in r: + return False + if sample_id: + if not any(i["sample_id"] == sample_id for i in r[n]): + return False + + return True + sample_ids = [1, 2, 3] + prods_with_pubs = apdb.getProductions(require_has_publication=True) + assert ( + len(prods_with_pubs) == 0 + ), "getProductions(require_has_publication=True) did not return empty, when it should have" + with pytest.raises(ValueError, match=r"This publication number is too long.*"): apdb.addPublication(sample_ids, "LHCb-" + "h" * 75) apdb.addPublication(sample_ids, "LHCb-ANA-YYYY-NNN") sample_publications = apdb.getPublications(sample_ids) - assert "LHCb-ANA-YYYY-NNN" in sample_publications.keys() - assert sample_ids[0] in sample_publications["LHCb-ANA-YYYY-NNN"] - assert sample_ids[1] in sample_publications["LHCb-ANA-YYYY-NNN"] - assert sample_ids[2] in sample_publications["LHCb-ANA-YYYY-NNN"] + assert result_has_pub(sample_publications, "LHCb-ANA-YYYY-NNN"), 'Result doesn\'t have "LHCb-ANA-YYYY-NNN" at all' + assert result_has_pub( + sample_publications, "LHCb-ANA-YYYY-NNN", sample_id=sample_ids[0] + ), f'Result doesn\'t have "LHCb-ANA-YYYY-NNN" for sample_id {sample_ids[0]}' + assert result_has_pub( + sample_publications, "LHCb-ANA-YYYY-NNN", sample_id=sample_ids[1] + ), f'Result doesn\'t have "LHCb-ANA-YYYY-NNN" for sample_id {sample_ids[1]}' + assert result_has_pub( + sample_publications, "LHCb-ANA-YYYY-NNN", sample_id=sample_ids[2] + ), f'Result doesn\'t have "LHCb-ANA-YYYY-NNN" for sample_id {sample_ids[2]}' with pytest.raises(IntegrityError): apdb.addPublication(sample_ids, "LHCb-ANA-YYYY-NNN") apdb.addPublication([sample_ids[0]], "LHCb-PAPER-YYYY-NNN") sample_publications = apdb.getPublications(sample_ids) - assert sample_ids[0] in sample_publications["LHCb-ANA-YYYY-NNN"] - assert sample_ids[0] in sample_publications["LHCb-PAPER-YYYY-NNN"] + + assert result_has_pub( + sample_publications, "LHCb-PAPER-YYYY-NNN" + ), 'Result doesn\'t have "LHCb-PAPER-YYYY-NNN" at all' + assert result_has_pub( + sample_publications, "LHCb-ANA-YYYY-NNN", sample_id=sample_ids[0] + ), f'Result doesn\'t have "LHCb-ANA-YYYY-NNN" for sample_id {sample_ids[0]}' + assert result_has_pub( + sample_publications, "LHCb-PAPER-YYYY-NNN", sample_id=sample_ids[0] + ), f'Result doesn\'t have "LHCb-PAPER-YYYY-NNN" for sample_id {sample_ids[0]}' + assert not result_has_pub( + sample_publications, "LHCb-PAPER-YYYY-NNN", sample_id=sample_ids[1] + ), f'Result shouldn\'t have "LHCb-PAPER-YYYY-NNN" for sample_id {sample_ids[1]}' + assert not result_has_pub( + sample_publications, "LHCb-PAPER-YYYY-NNN", sample_id=sample_ids[2] + ), f'Result shouldn\'t have "LHCb-PAPER-YYYY-NNN" for sample_id {sample_ids[2]}' prods_with_pubs = apdb.getProductions() @@ -388,6 +422,11 @@ def test_addPublication(apdb): assert "LHCb-PAPER-YYYY-NNN" not in prods_with_pubs[1]["publications"] assert "LHCb-PAPER-YYYY-NNN" not in prods_with_pubs[2]["publications"] + prods_with_pubs = apdb.getProductions(require_has_publication=True) + assert ( + len(prods_with_pubs) == 3 + ), "getProductions(require_has_publication=True) did not return the three requests with publications" + def test_get_and_delayHousekeepingInteractionDue(apdb): apdb.registerRequests([REQUEST_1, REQUEST_2, REQUEST_3]) diff --git a/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py b/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py index 9b6931d09cb097e84de2fbadc815922aafa843e3..d3bb0bfb3e8db38f40ef6336c419cf1c7b23e946 100644 --- a/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py +++ b/src/LHCbDIRAC/ProductionManagementSystem/Service/TornadoAnalysisProductionsHandler.py @@ -91,6 +91,7 @@ class TornadoAnalysisProductionsHandler(TornadoService): with_transformations, at_time, show_archived, + require_has_publication, ): """See :meth:`~.AnalysisProductionsClient.getProductions`""" if (analysis or name or version) and wg is None: @@ -105,6 +106,7 @@ class TornadoAnalysisProductionsHandler(TornadoService): state=state, at_time=at_time, show_archived=show_archived, + require_has_publication=require_has_publication, ) return _queryToResults(results, with_lfns, with_pfns, with_transformations) @@ -269,8 +271,10 @@ def _queryToResults(results, with_lfns, with_pfns, with_transformations): columns=["TransformationID", "Status"], ) extraTransInfos = {t["TransformationID"]: t for t in returnValueOrRaise(retVal)} + inputQueries = returnValueOrRaise(tClient.getBookkeepingQueries(tIDs)) for tInfo in chain(*(r["transformations"] for r in results)): tInfo["status"] = extraTransInfos[tInfo["id"]]["Status"] + tInfo["input_query"] = inputQueries[tInfo["id"]] else: for result in results: del result["transformations"]