From 09c7d39256c771fd1ae67cd264a2ac115e4209b3 Mon Sep 17 00:00:00 2001 From: Mihai Patrascoiu <mihai.patrascoiu@cern.ch> Date: Fri, 21 Jul 2023 17:12:00 +0200 Subject: [PATCH 1/3] FTS-1920, FTS-1940: Schema for transfer retry log file --- src/db/schema/mysql/fts-diff-8.1.0.sql | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/db/schema/mysql/fts-diff-8.1.0.sql b/src/db/schema/mysql/fts-diff-8.1.0.sql index 3b45687c6..1ed4d9b34 100644 --- a/src/db/schema/mysql/fts-diff-8.1.0.sql +++ b/src/db/schema/mysql/fts-diff-8.1.0.sql @@ -1,10 +1,15 @@ -- -- FTS3 Schema 8.1.0 --- [FTS-1914] Schema changes for TPC-support configuration per SE +-- [FTS-1914] TPC-support configuration per SE +-- [FTS-1920] Retry log file -- ALTER TABLE `t_se` ADD COLUMN `tpc_support` VARCHAR(10) DEFAULT NULL; +ALTER TABLE `t_file_retry_errors` + ADD COLUMN `transfer_host` varchar(255) DEFAULT NULL, + ADD COLUMN `log_file` varchar(2048) DEFAULT NULL; + INSERT INTO t_schema_vers (major, minor, patch, message) -VALUES (8, 1, 0, 'FTS-1914: Schema changes for TCP-support configuration per SE'); +VALUES (8, 1, 0, 'FTS-1914: TPC-support configuration per SE'); -- GitLab From f3fb23f66fe445ed76612c190802eeaaf3db94c2 Mon Sep 17 00:00:00 2001 From: Mihai Patrascoiu <mihai.patrascoiu@cern.ch> Date: Fri, 21 Jul 2023 17:43:28 +0200 Subject: [PATCH 2/3] FTS-1920: Propagate and store the transfer host and logfile path for transfer retries --- src/db/generic/GenericDbIfce.h | 6 ++++-- src/db/mysql/MySqlAPI.cpp | 15 ++++++++------- src/db/mysql/MySqlAPI.h | 6 ++++-- src/msg-bus/events/Message.proto | 2 ++ .../transfers/MessageProcessingService.cpp | 3 ++- src/url-copy/LegacyReporter.cpp | 2 ++ 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/db/generic/GenericDbIfce.h b/src/db/generic/GenericDbIfce.h index 053aaf477..ca1a07e6c 100644 --- a/src/db/generic/GenericDbIfce.h +++ b/src/db/generic/GenericDbIfce.h @@ -223,10 +223,12 @@ public: /// Add a new retry to the transfer identified by fileId /// @param jobId Job identifier /// @param fileId Transfer identifier + /// @param retryNo The retry attempt number /// @param reason String representation of the failure /// @param errcode An integer representing the failure - virtual void setRetryTransfer(const std::string & jobId, uint64_t fileId, int retry, const std::string& reason, - int errcode) = 0; + /// @param logFile The log file path + virtual void setRetryTransfer(const std::string& jobId, uint64_t fileId, int retryNo, + const std::string& reason, const std::string& logFile, int errcode) = 0; /// Bulk update of transfer progress virtual void updateFileTransferProgressVector(const std::vector<fts3::events::MessageUpdater> &messages) = 0; diff --git a/src/db/mysql/MySqlAPI.cpp b/src/db/mysql/MySqlAPI.cpp index 339f711d3..3966a0cfb 100644 --- a/src/db/mysql/MySqlAPI.cpp +++ b/src/db/mysql/MySqlAPI.cpp @@ -2697,8 +2697,8 @@ std::vector<TransferState> MySqlAPI::getStateOfTransfer(const std::string& jobId } -void MySqlAPI::setRetryTransfer(const std::string &jobId, uint64_t fileId, int retry, - const std::string &reason, int errcode) +void MySqlAPI::setRetryTransfer(const std::string& jobId, uint64_t fileId, int retryNo, + const std::string& reason, const std::string& logFile, int errcode) { soci::session sql(*connectionPool); @@ -2760,7 +2760,7 @@ void MySqlAPI::setRetryTransfer(const std::string &jobId, uint64_t fileId, int r sql << "update t_file set retry = :retry, current_failures = 0, file_state='STAGING', " "internal_file_params=NULL, transfer_host=NULL,start_time=NULL, pid=NULL, " " filesize=0, staging_start=NULL, staging_finished=NULL where file_id=:file_id and job_id=:job_id AND file_state NOT IN ('FINISHED','STAGING','SUBMITTED','FAILED','CANCELED') ", - soci::use(retry), + soci::use(retryNo), soci::use(fileId), soci::use(jobId); } @@ -2770,15 +2770,16 @@ void MySqlAPI::setRetryTransfer(const std::string &jobId, uint64_t fileId, int r "transfer_host=NULL, log_file=NULL," " log_file_debug=NULL, throughput = 0, current_failures = 1 " " WHERE file_id = :fileId AND job_id = :jobId AND file_state NOT IN ('FINISHED','SUBMITTED','FAILED','CANCELED')", - soci::use(tTime), soci::use(retry), soci::use(fileId), soci::use(jobId); + soci::use(tTime), soci::use(retryNo), soci::use(fileId), soci::use(jobId); } // Keep log sql << "INSERT IGNORE INTO t_file_retry_errors " - " (file_id, attempt, datetime, reason) " - "VALUES (:fileId, :attempt, UTC_TIMESTAMP(), :reason)", - soci::use(fileId), soci::use(retry), soci::use(reason); + " (file_id, attempt, datetime, reason, transfer_host, log_file) " + "VALUES (:fileId, :retryNo, UTC_TIMESTAMP(), :reason, :hostname, :logFile)", + soci::use(fileId), soci::use(retryNo), + soci::use(reason), soci::use(hostname), soci::use(logFile); sql.commit(); } diff --git a/src/db/mysql/MySqlAPI.h b/src/db/mysql/MySqlAPI.h index 37111a99a..c6808c77e 100644 --- a/src/db/mysql/MySqlAPI.h +++ b/src/db/mysql/MySqlAPI.h @@ -181,10 +181,12 @@ public: /// Add a new retry to the transfer identified by fileId /// @param jobId Job identifier /// @param fileId Transfer identifier + /// @param retryNo The retry attempt number /// @param reason String representation of the failure /// @param errcode An integer representing the failure - virtual void setRetryTransfer(const std::string & jobId, uint64_t fileId, int retry, const std::string& reason, - int errcode); + /// @param logFile The log file path + virtual void setRetryTransfer(const std::string& jobId, uint64_t fileId, int retryNo, + const std::string& reason, const std::string& logFile, int errcode); /// Bulk update of transfer progress virtual void updateFileTransferProgressVector(const std::vector<fts3::events::MessageUpdater> &messages); diff --git a/src/msg-bus/events/Message.proto b/src/msg-bus/events/Message.proto index 24c3d87ed..b3e0e2509 100644 --- a/src/msg-bus/events/Message.proto +++ b/src/msg-bus/events/Message.proto @@ -27,4 +27,6 @@ message Message { optional double instantaneous_throughput = 18; optional uint64 gfal_perf_timestamp = 19; optional uint64 transferred_since_last_ping = 20; + + optional string log_path = 21; } diff --git a/src/server/services/transfers/MessageProcessingService.cpp b/src/server/services/transfers/MessageProcessingService.cpp index 389260f2f..50922f44b 100644 --- a/src/server/services/transfers/MessageProcessingService.cpp +++ b/src/server/services/transfers/MessageProcessingService.cpp @@ -247,7 +247,8 @@ void MessageProcessingService::performOtherMessageDbChange(const fts3::events::M if (retryTimes <= retry - 1) { db::DBSingleton::instance().getDBObjectInstance()->setRetryTransfer( - msg.job_id(), msg.file_id(), retryTimes+1, msg.transfer_message(), msg.errcode()); + msg.job_id(), msg.file_id(), retryTimes + 1, + msg.transfer_message(), msg.log_path(), msg.errcode()); return; } } diff --git a/src/url-copy/LegacyReporter.cpp b/src/url-copy/LegacyReporter.cpp index 2417a6af3..79114e5c6 100644 --- a/src/url-copy/LegacyReporter.cpp +++ b/src/url-copy/LegacyReporter.cpp @@ -152,6 +152,8 @@ void LegacyReporter::sendTransferCompleted(const Transfer &transfer, Gfal2Transf if ((transfer.error->code() == EEXIST) && (opts.dstFileReport) && (!opts.overwrite)) { status.set_file_metadata(replaceMetadataString(transfer.fileMetadata)); } + + status.set_log_path(transfer.logFile); } status.set_transfer_message(fullErrMsg.str()); status.set_retry(transfer.error->isRecoverable()); -- GitLab From 137c53f0bdbbdd5bbc4619f87d37604e654f45dd Mon Sep 17 00:00:00 2001 From: Mihai Patrascoiu <mihai.patrascoiu@cern.ch> Date: Fri, 21 Jul 2023 17:46:56 +0200 Subject: [PATCH 3/3] Format the SQL statements in "setRetryTransfer" function --- src/db/mysql/MySqlAPI.cpp | 82 ++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 40 deletions(-) diff --git a/src/db/mysql/MySqlAPI.cpp b/src/db/mysql/MySqlAPI.cpp index 3966a0cfb..b62d79337 100644 --- a/src/db/mysql/MySqlAPI.cpp +++ b/src/db/mysql/MySqlAPI.cpp @@ -2702,35 +2702,30 @@ void MySqlAPI::setRetryTransfer(const std::string& jobId, uint64_t fileId, int r { soci::session sql(*connectionPool); - //expressed in secs, default delay + // Expressed in secs, default delay const int default_retry_delay = DEFAULT_RETRY_DELAY; int retry_delay = 0; std::string job_type; - soci::indicator ind = soci::i_ok; + auto ind = soci::i_ok; try { - sql << - " select RETRY_DELAY, job_type from t_job where job_id=:jobId ", - soci::use(jobId), - soci::into(retry_delay), - soci::into(job_type, ind) - ; + sql << "SELECT retry_delay, job_type FROM t_job WHERE job_id = :jobId ", + soci::use(jobId), + soci::into(retry_delay), + soci::into(job_type, ind); sql.begin(); - if ( (ind == soci::i_ok) && job_type == "Y") + if ((ind == soci::i_ok) && (job_type == "Y")) { - sql << "UPDATE t_job SET " - " job_state = 'ACTIVE' " - "WHERE job_id = :jobId AND " - " job_state NOT IN ('FINISHEDDIRTY','FAILED','CANCELED','FINISHED') AND " - " job_type = 'Y'", - soci::use(jobId); + " job_state = 'ACTIVE' " + "WHERE job_id = :jobId AND job_type = 'Y' AND " + " job_state NOT IN ('FINISHEDDIRTY', 'FAILED', 'CANCELED', 'FINISHED')", + soci::use(jobId); } - struct tm tTime; if (retry_delay > 0) { @@ -2748,38 +2743,45 @@ void MySqlAPI::setRetryTransfer(const std::string& jobId, uint64_t fileId, int r int bring_online = -1; int copy_pin_lifetime = -1; - // query for the file state in DB - sql << "SELECT bring_online, copy_pin_lifetime FROM t_job WHERE job_id=:jobId", - soci::use(jobId), - soci::into(bring_online), - soci::into(copy_pin_lifetime); + // Query for the file state in DB + sql << "SELECT bring_online, copy_pin_lifetime FROM t_job WHERE job_id = :jobId", + soci::use(jobId), + soci::into(bring_online), + soci::into(copy_pin_lifetime); - //staging exception, if file failed with timeout and was staged before, reset it - if( (bring_online > 0 || copy_pin_lifetime > 0) && errcode == ETIMEDOUT) + // Staging exception: if file failed with timeout and was staged before, reset it + if ((bring_online > 0 || copy_pin_lifetime > 0) && (errcode == ETIMEDOUT)) { - sql << "update t_file set retry = :retry, current_failures = 0, file_state='STAGING', " - "internal_file_params=NULL, transfer_host=NULL,start_time=NULL, pid=NULL, " - " filesize=0, staging_start=NULL, staging_finished=NULL where file_id=:file_id and job_id=:job_id AND file_state NOT IN ('FINISHED','STAGING','SUBMITTED','FAILED','CANCELED') ", - soci::use(retryNo), - soci::use(fileId), - soci::use(jobId); + sql << "UPDATE t_file SET " + " retry = :retryNo, current_failures = 0, file_state = 'STAGING', " + " filesize = 0, internal_file_params = NULL, transfer_host = NULL, pid = NULL, " + " start_time = NULL, staging_start = NULL, staging_finished = NULL " + "WHERE file_id = :fileId AND job_id = :jobId AND " + " file_state NOT IN ('FINISHED', 'STAGING', 'SUBMITTED', 'FAILED', 'CANCELED')", + soci::use(retryNo), + soci::use(fileId), + soci::use(jobId); } else { - sql << "UPDATE t_file SET retry_timestamp=:1, retry = :retry, file_state = 'SUBMITTED', start_time=NULL, " - "transfer_host=NULL, log_file=NULL," - " log_file_debug=NULL, throughput = 0, current_failures = 1 " - " WHERE file_id = :fileId AND job_id = :jobId AND file_state NOT IN ('FINISHED','SUBMITTED','FAILED','CANCELED')", - soci::use(tTime), soci::use(retryNo), soci::use(fileId), soci::use(jobId); - + sql << "UPDATE t_file SET " + " retry = :retryNo, retry_timestamp = :tTime, file_state = 'SUBMITTED', " + " throughput = 0, current_failures = 1, start_time = NULL, " + " transfer_host = NULL, log_file = NULL, log_file_debug = NULL " + " WHERE file_id = :fileId AND job_id = :jobId AND " + " file_state NOT IN ('FINISHED', 'SUBMITTED', 'FAILED', 'CANCELED')", + soci::use(retryNo), + soci::use(tTime), + soci::use(fileId), + soci::use(jobId); } - // Keep log + // Keep transfer retry log sql << "INSERT IGNORE INTO t_file_retry_errors " - " (file_id, attempt, datetime, reason, transfer_host, log_file) " - "VALUES (:fileId, :retryNo, UTC_TIMESTAMP(), :reason, :hostname, :logFile)", - soci::use(fileId), soci::use(retryNo), - soci::use(reason), soci::use(hostname), soci::use(logFile); + " (file_id, attempt, datetime, reason, transfer_host, log_file) " + "VALUES (:fileId, :retryNo, UTC_TIMESTAMP(), :reason, :hostname, :logFile)", + soci::use(fileId), soci::use(retryNo), + soci::use(reason), soci::use(hostname), soci::use(logFile); sql.commit(); } -- GitLab