From 1ccff5ee5ad3d19d000a342e35946117d828237b Mon Sep 17 00:00:00 2001 From: Konstantina Skovola <konstantina.skovola@cern.ch> Date: Wed, 12 Mar 2025 17:14:17 +0000 Subject: [PATCH 1/5] Fix gRPC Frontend crash when multiple Retrieve requests are submitted simultaneously Closes #1110 See merge request cta/CTA!838 Changelog: feature/bug/maintenance Issue: <id> --- frontend/grpc/FrontendGrpcService.cpp | 63 ++++++++++++++++----------- frontend/grpc/FrontendGrpcService.h | 15 +++---- 2 files changed, 45 insertions(+), 33 deletions(-) diff --git a/frontend/grpc/FrontendGrpcService.cpp b/frontend/grpc/FrontendGrpcService.cpp index 035a17aa4f..7803784034 100644 --- a/frontend/grpc/FrontendGrpcService.cpp +++ b/frontend/grpc/FrontendGrpcService.cpp @@ -31,7 +31,8 @@ namespace cta::frontend::grpc { Status -CtaRpcImpl::GenericRequest(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response) { +CtaRpcImpl::ProcessGrpcRequest(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response, cta::log::LogContext &lc) { + try { cta::eos::Client client = request->notification().cli(); cta::common::dataStructures::SecurityIdentity clientIdentity(client.sec().name(), cta::utils::getShortHostname(), @@ -39,43 +40,53 @@ CtaRpcImpl::GenericRequest(::grpc::ServerContext* context, const cta::xrd::Reque cta::frontend::WorkflowEvent wfe(*m_frontendService, clientIdentity, request->notification()); *response = wfe.process(); } catch (cta::exception::PbException &ex) { - m_lc.log(cta::log::ERR, ex.getMessageValue()); + lc.log(cta::log::ERR, ex.getMessageValue()); response->set_type(cta::xrd::Response::RSP_ERR_PROTOBUF); response->set_message_txt(ex.getMessageValue()); return ::grpc::Status(::grpc::StatusCode::INTERNAL, ex.getMessageValue()); } catch (cta::exception::UserError &ex) { - m_lc.log(cta::log::ERR, ex.getMessageValue()); + lc.log(cta::log::ERR, ex.getMessageValue()); response->set_type(cta::xrd::Response::RSP_ERR_USER); response->set_message_txt(ex.getMessageValue()); return ::grpc::Status(::grpc::StatusCode::INVALID_ARGUMENT, ex.getMessageValue()); } catch (cta::exception::Exception &ex) { - m_lc.log(cta::log::ERR, ex.getMessageValue()); + lc.log(cta::log::ERR, ex.getMessageValue()); response->set_type(cta::xrd::Response::RSP_ERR_CTA); response->set_message_txt(ex.getMessageValue()); return ::grpc::Status(::grpc::StatusCode::UNKNOWN, ex.getMessageValue()); } catch (std::runtime_error &ex) { - m_lc.log(cta::log::ERR, ex.what()); + lc.log(cta::log::ERR, ex.what()); response->set_type(cta::xrd::Response::RSP_ERR_CTA); response->set_message_txt(ex.what()); return ::grpc::Status(::grpc::StatusCode::UNKNOWN, ex.what()); } catch (...) { response->set_type(cta::xrd::Response::RSP_ERR_CTA); - return ::grpc::Status(::grpc::StatusCode::UNKNOWN, "Error processing gRPC GenericRequest"); + return ::grpc::Status(::grpc::StatusCode::UNKNOWN, "Error processing gRPC request"); } return Status::OK; } Status -CtaRpcImpl::Create(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response) noexcept { +CtaRpcImpl::Create(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response) { + cta::log::LogContext lc(m_frontendService->getLogContext()); + cta::log::ScopedParamContainer sp(lc); + + sp.add("remoteHost", context->peer()); + sp.add("request", "create"); // check that the workflow is set appropriately for the create event auto event = request->notification().wf().event(); if (event != cta::eos::Workflow::CREATE) return ::grpc::Status(::grpc::StatusCode::INVALID_ARGUMENT, "Unexpected workflow event type. Expected CREATE, found " + cta::eos::Workflow_EventType_Name(event)); - return GenericRequest(context, request, response); + return ProcessGrpcRequest(context, request, response, lc); } Status -CtaRpcImpl::Archive(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response) noexcept { +CtaRpcImpl::Archive(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response) { + cta::log::LogContext lc(m_frontendService->getLogContext()); + cta::log::ScopedParamContainer sp(lc); + + sp.add("remoteHost", context->peer()); + sp.add("request", "archive"); // check validate request args const std::string storageClass = request->notification().file().storage_class(); if (storageClass.empty()) { @@ -86,12 +97,13 @@ CtaRpcImpl::Archive(::grpc::ServerContext* context, const cta::xrd::Request* req if (event != cta::eos::Workflow::CLOSEW) return ::grpc::Status(::grpc::StatusCode::INVALID_ARGUMENT, "Unexpected workflow event type. Expected CLOSEW, found " + cta::eos::Workflow_EventType_Name(event)); - return GenericRequest(context, request, response); + return ProcessGrpcRequest(context, request, response, lc); } Status -CtaRpcImpl::Delete(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response) noexcept { - cta::log::ScopedParamContainer sp(m_lc); +CtaRpcImpl::Delete(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response) { + cta::log::LogContext lc(m_frontendService->getLogContext()); + cta::log::ScopedParamContainer sp(lc); sp.add("remoteHost", context->peer()); sp.add("request", "delete"); @@ -102,17 +114,18 @@ CtaRpcImpl::Delete(::grpc::ServerContext* context, const cta::xrd::Request* requ return ::grpc::Status(::grpc::StatusCode::INVALID_ARGUMENT, "Unexpected workflow event type. Expected DELETE, found " + cta::eos::Workflow_EventType_Name(event)); if (request->notification().file().archive_file_id() == 0) { - m_lc.log(cta::log::WARNING, "Invalid archive file id"); + lc.log(cta::log::WARNING, "Invalid archive file id"); return ::grpc::Status(::grpc::StatusCode::INVALID_ARGUMENT, "Invalid archive file id."); } // done with validation, now do the workflow processing - return GenericRequest(context, request, response); + return ProcessGrpcRequest(context, request, response, lc); } Status -CtaRpcImpl::Retrieve(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response) noexcept { - cta::log::ScopedParamContainer sp(m_lc); +CtaRpcImpl::Retrieve(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response) { + cta::log::LogContext lc(m_frontendService->getLogContext()); + cta::log::ScopedParamContainer sp(lc); sp.add("remoteHost", context->peer()); sp.add("request", "retrieve"); @@ -128,7 +141,7 @@ CtaRpcImpl::Retrieve(::grpc::ServerContext* context, const cta::xrd::Request* re // check validate request args if (request->notification().file().archive_file_id() == 0) { - m_lc.log(cta::log::WARNING, "Invalid archive file id"); + lc.log(cta::log::WARNING, "Invalid archive file id"); return ::grpc::Status(::grpc::StatusCode::INVALID_ARGUMENT, "Invalid archive file id."); } @@ -141,17 +154,18 @@ CtaRpcImpl::Retrieve(::grpc::ServerContext* context, const cta::xrd::Request* re sp.add("archiveID", request->notification().file().archive_file_id()); sp.add("fileID", request->notification().file().disk_file_id()); - return GenericRequest(context, request, response); + return ProcessGrpcRequest(context, request, response, lc); } Status CtaRpcImpl::CancelRetrieve(::grpc::ServerContext* context, const cta::xrd::Request* request, - cta::xrd::Response* response) noexcept { - cta::log::ScopedParamContainer sp(m_lc); + cta::xrd::Response* response) { + cta::log::LogContext lc(m_frontendService->getLogContext()); + cta::log::ScopedParamContainer sp(lc); sp.add("remoteHost", context->peer()); - m_lc.log(cta::log::DEBUG, "CancelRetrieve request"); + lc.log(cta::log::DEBUG, "CancelRetrieve request"); sp.add("request", "cancel"); // check validate request args @@ -160,7 +174,7 @@ Status CtaRpcImpl::CancelRetrieve(::grpc::ServerContext* context, return ::grpc::Status(::grpc::StatusCode::INVALID_ARGUMENT, "Unexpected workflow event type. Expected ABORT_PREPARE, found " + cta::eos::Workflow_EventType_Name(event)); if (!request->notification().file().archive_file_id()) { - m_lc.log(cta::log::WARNING, "Invalid archive file id"); + lc.log(cta::log::WARNING, "Invalid archive file id"); return ::grpc::Status(::grpc::StatusCode::INVALID_ARGUMENT, "Invalid archive file id."); } @@ -173,7 +187,7 @@ Status CtaRpcImpl::CancelRetrieve(::grpc::ServerContext* context, sp.add("schedulerJobID", request->notification().file().archive_file_id()); // field verification done, now try to call the process method - return GenericRequest(context, request, response); + return ProcessGrpcRequest(context, request, response, lc); } /* initialize the frontend service @@ -181,7 +195,6 @@ Status CtaRpcImpl::CancelRetrieve(::grpc::ServerContext* context, * and makes the rpc calls available through this class */ CtaRpcImpl::CtaRpcImpl(const std::string& config) - : m_frontendService(std::make_unique<cta::frontend::FrontendService>(config)) - , m_lc(m_frontendService->getLogContext()) {} + : m_frontendService(std::make_unique<cta::frontend::FrontendService>(config)) {} } // namespace cta::frontend::grpc \ No newline at end of file diff --git a/frontend/grpc/FrontendGrpcService.h b/frontend/grpc/FrontendGrpcService.h index 027df5ec59..964c25c43a 100644 --- a/frontend/grpc/FrontendGrpcService.h +++ b/frontend/grpc/FrontendGrpcService.h @@ -28,20 +28,19 @@ class CtaRpcImpl : public CtaRpc::Service { private: std::unique_ptr<cta::frontend::FrontendService> m_frontendService; - log::LogContext m_lc; public: CtaRpcImpl(const std::string& config); FrontendService& getFrontendService() const { return *m_frontendService; } - log::LogContext getLogContext() const { return m_lc; } // Archive/Retrieve interface - Status Create(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response) noexcept; - Status Archive(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response) noexcept; - Status Retrieve(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response) noexcept; - Status CancelRetrieve(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response) noexcept; - Status Delete(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response) noexcept; - Status GenericRequest(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response); + Status Create(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response); + Status Archive(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response); + Status Retrieve(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response); + Status CancelRetrieve(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response); + Status Delete(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response); +private: + Status ProcessGrpcRequest(::grpc::ServerContext* context, const cta::xrd::Request* request, cta::xrd::Response* response, cta::log::LogContext &lc); }; } // namespace cta::frontend::grpc -- GitLab From 275f47c33e21caac02cd031a56355b8871c38b88 Mon Sep 17 00:00:00 2001 From: Joao Afonso <joao.afonso@cern.ch> Date: Thu, 13 Mar 2025 12:35:07 +0100 Subject: [PATCH 2/5] Setting Oracle version 21 in versionlock file --- .../ctafrontend/alma9/etc/yum/pluginconf.d/versionlock.list | 2 ++ 1 file changed, 2 insertions(+) diff --git a/continuousintegration/docker/ctafrontend/alma9/etc/yum/pluginconf.d/versionlock.list b/continuousintegration/docker/ctafrontend/alma9/etc/yum/pluginconf.d/versionlock.list index 5bf5f2fd62..d86622fb36 100644 --- a/continuousintegration/docker/ctafrontend/alma9/etc/yum/pluginconf.d/versionlock.list +++ b/continuousintegration/docker/ctafrontend/alma9/etc/yum/pluginconf.d/versionlock.list @@ -31,3 +31,5 @@ xrootd-server-1:5.6.1-1.el9.* xrootd-server-devel-1:5.6.1-1.el9.* xrootd-server-libs-1:5.6.1-1.el9.* xrootd-tests-1:5.6.1-1.el9.* +oracle-instantclient-basic-21.12.0.0.0-1.* +oracle-instantclient-devel-21.12.0.0.0-1.* -- GitLab From e1b9fcd8c71d9bb33158d0a72cbff572c9c764ac Mon Sep 17 00:00:00 2001 From: Joao Afonso <joao.afonso@cern.ch> Date: Thu, 13 Mar 2025 17:17:48 +0100 Subject: [PATCH 3/5] Fixing DOCKER_IMAGE_BUILDER_IMAGE --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index afd3e54168..7fbf677bba 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -30,7 +30,7 @@ variables: # Images EL9_IMAGE: gitlab-registry.cern.ch/linuxsupport/alma9-base PYTHON3_IMAGE: python:3-alpine - DOCKER_IMAGE_BUILDER_IMAGE: gitlab-registry.cern.ch/ci-tools/docker-image-builder + DOCKER_IMAGE_BUILDER_IMAGE: gcr.io/kaniko-project/executor:debug CPPCHECK_IMAGE: gitlab-registry.cern.ch/cta/eoscta-operations/registry/container_registry/cta-cppcheck:2.16.0 GITLAB_RELEASE_CLI_IMAGE: registry.gitlab.com/gitlab-org/release-cli:latest -- GitLab From 5728d98d74d9a136874b8c16210d64bd476b6602 Mon Sep 17 00:00:00 2001 From: Joao Afonso <joao.afonso@cern.ch> Date: Thu, 13 Mar 2025 17:37:12 +0100 Subject: [PATCH 4/5] Fixing postgres unit-test --- .gitlab/ci/tests.gitlab-ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitlab/ci/tests.gitlab-ci.yml b/.gitlab/ci/tests.gitlab-ci.yml index 113cd71180..b839ddf187 100644 --- a/.gitlab/ci/tests.gitlab-ci.yml +++ b/.gitlab/ci/tests.gitlab-ci.yml @@ -85,6 +85,8 @@ unit-test-postgresql: - chown -R postgres:postgres ${POSTGRESQL_DATA_DIR} - mkdir -p ${POSTGRESQL_LOG_DIR} - chown -R postgres:postgres ${POSTGRESQL_LOG_DIR} + - mkdir /var/run/postgresql + - chown -R postgres:postgres /var/run/postgresql/ - runuser -u postgres -- initdb -D ${POSTGRESQL_DATA_DIR} - runuser -u postgres -- pg_ctl start -w -t 10 -D ${POSTGRESQL_DATA_DIR} -l ${POSTGRESQL_LOG_DIR}/cta_test_postgres.log - runuser -u postgres -- createdb cta -- GitLab From 9c99ec9a3d6d910903cc536357734cf561b7b714 Mon Sep 17 00:00:00 2001 From: Joao Afonso <joao.afonso@cern.ch> Date: Thu, 13 Mar 2025 10:30:18 +0100 Subject: [PATCH 5/5] Changelog for v5.11.2.1-1 --- CHANGELOG.md | 6 ++++++ cta.spec.in | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9124fa84b9..0033e4b662 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## v5.11.2.1-1 + +### Bug Fixes + +- cta/#1110 - Fix gRPC Frontend crash when multiple Retrieve requests are submitted simultaneously + ## v5.11.2.0-1 ### Features diff --git a/cta.spec.in b/cta.spec.in index 5d9e81e42f..af86dbda60 100644 --- a/cta.spec.in +++ b/cta.spec.in @@ -625,6 +625,10 @@ This package contains .repo files, gpg keys and yum-versionlock configuration fo %changelog +* Thu Mar 13 2025 Joao Afonso <joao.afonso@cern.ch> - 5.11.2.1-1 +- Fixed gRPC Frontend crash when multiple Retrieve requests are submitted simultaneously +- See CHANGELOG.md for details + * Tue Dec 10 2024 Joao Afonso <joao.afonso@cern.ch> - 5.11.2.0-1 - See CHANGELOG.md for details -- GitLab