From 09c7d39256c771fd1ae67cd264a2ac115e4209b3 Mon Sep 17 00:00:00 2001
From: Mihai Patrascoiu <mihai.patrascoiu@cern.ch>
Date: Fri, 21 Jul 2023 17:12:00 +0200
Subject: [PATCH 1/3] FTS-1920, FTS-1940: Schema for transfer retry log file

---
 src/db/schema/mysql/fts-diff-8.1.0.sql | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/db/schema/mysql/fts-diff-8.1.0.sql b/src/db/schema/mysql/fts-diff-8.1.0.sql
index 3b45687c6..1ed4d9b34 100644
--- a/src/db/schema/mysql/fts-diff-8.1.0.sql
+++ b/src/db/schema/mysql/fts-diff-8.1.0.sql
@@ -1,10 +1,15 @@
 --
 -- FTS3 Schema 8.1.0
--- [FTS-1914] Schema changes for TPC-support configuration per SE
+-- [FTS-1914] TPC-support configuration per SE
+-- [FTS-1920] Retry log file
 --
 
 ALTER TABLE `t_se`
     ADD COLUMN `tpc_support` VARCHAR(10) DEFAULT NULL;
 
+ALTER TABLE `t_file_retry_errors`
+    ADD COLUMN `transfer_host` varchar(255) DEFAULT NULL,
+    ADD COLUMN `log_file` varchar(2048) DEFAULT NULL;
+
 INSERT INTO t_schema_vers (major, minor, patch, message)
-VALUES (8, 1, 0, 'FTS-1914: Schema changes for TCP-support configuration per SE');
+VALUES (8, 1, 0, 'FTS-1914: TPC-support configuration per SE');
-- 
GitLab


From f3fb23f66fe445ed76612c190802eeaaf3db94c2 Mon Sep 17 00:00:00 2001
From: Mihai Patrascoiu <mihai.patrascoiu@cern.ch>
Date: Fri, 21 Jul 2023 17:43:28 +0200
Subject: [PATCH 2/3] FTS-1920: Propagate and store the transfer host and
 logfile path for transfer retries

---
 src/db/generic/GenericDbIfce.h                    |  6 ++++--
 src/db/mysql/MySqlAPI.cpp                         | 15 ++++++++-------
 src/db/mysql/MySqlAPI.h                           |  6 ++++--
 src/msg-bus/events/Message.proto                  |  2 ++
 .../transfers/MessageProcessingService.cpp        |  3 ++-
 src/url-copy/LegacyReporter.cpp                   |  2 ++
 6 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/src/db/generic/GenericDbIfce.h b/src/db/generic/GenericDbIfce.h
index 053aaf477..ca1a07e6c 100644
--- a/src/db/generic/GenericDbIfce.h
+++ b/src/db/generic/GenericDbIfce.h
@@ -223,10 +223,12 @@ public:
     /// Add a new retry to the transfer identified by fileId
     /// @param jobId    Job identifier
     /// @param fileId   Transfer identifier
+    /// @param retryNo  The retry attempt number
     /// @param reason   String representation of the failure
     /// @param errcode  An integer representing the failure
-    virtual void setRetryTransfer(const std::string & jobId, uint64_t fileId, int retry, const std::string& reason,
-        int errcode) = 0;
+    /// @param logFile  The log file path
+    virtual void setRetryTransfer(const std::string& jobId, uint64_t fileId, int retryNo,
+                                  const std::string& reason, const std::string& logFile, int errcode) = 0;
 
     /// Bulk update of transfer progress
     virtual void updateFileTransferProgressVector(const std::vector<fts3::events::MessageUpdater> &messages) = 0;
diff --git a/src/db/mysql/MySqlAPI.cpp b/src/db/mysql/MySqlAPI.cpp
index 339f711d3..3966a0cfb 100644
--- a/src/db/mysql/MySqlAPI.cpp
+++ b/src/db/mysql/MySqlAPI.cpp
@@ -2697,8 +2697,8 @@ std::vector<TransferState> MySqlAPI::getStateOfTransfer(const std::string& jobId
 }
 
 
-void MySqlAPI::setRetryTransfer(const std::string &jobId, uint64_t fileId, int retry,
-    const std::string &reason, int errcode)
+void MySqlAPI::setRetryTransfer(const std::string& jobId, uint64_t fileId, int retryNo,
+                                const std::string& reason, const std::string& logFile, int errcode)
 {
     soci::session sql(*connectionPool);
 
@@ -2760,7 +2760,7 @@ void MySqlAPI::setRetryTransfer(const std::string &jobId, uint64_t fileId, int r
             sql << "update t_file set retry = :retry, current_failures = 0, file_state='STAGING', "
                 "internal_file_params=NULL, transfer_host=NULL,start_time=NULL, pid=NULL, "
                 " filesize=0, staging_start=NULL, staging_finished=NULL where file_id=:file_id and job_id=:job_id AND file_state NOT IN ('FINISHED','STAGING','SUBMITTED','FAILED','CANCELED') ",
-                soci::use(retry),
+                soci::use(retryNo),
                 soci::use(fileId),
                 soci::use(jobId);
         }
@@ -2770,15 +2770,16 @@ void MySqlAPI::setRetryTransfer(const std::string &jobId, uint64_t fileId, int r
                 "transfer_host=NULL, log_file=NULL,"
                 " log_file_debug=NULL, throughput = 0, current_failures = 1 "
                 " WHERE  file_id = :fileId AND  job_id = :jobId AND file_state NOT IN ('FINISHED','SUBMITTED','FAILED','CANCELED')",
-                soci::use(tTime), soci::use(retry), soci::use(fileId), soci::use(jobId);
+                soci::use(tTime), soci::use(retryNo), soci::use(fileId), soci::use(jobId);
 
         }
 
         // Keep log
         sql << "INSERT IGNORE INTO t_file_retry_errors "
-            "    (file_id, attempt, datetime, reason) "
-            "VALUES (:fileId, :attempt, UTC_TIMESTAMP(), :reason)",
-            soci::use(fileId), soci::use(retry), soci::use(reason);
+            "    (file_id, attempt, datetime, reason, transfer_host, log_file) "
+            "VALUES (:fileId, :retryNo, UTC_TIMESTAMP(), :reason, :hostname, :logFile)",
+            soci::use(fileId), soci::use(retryNo),
+            soci::use(reason), soci::use(hostname), soci::use(logFile);
 
         sql.commit();
     }
diff --git a/src/db/mysql/MySqlAPI.h b/src/db/mysql/MySqlAPI.h
index 37111a99a..c6808c77e 100644
--- a/src/db/mysql/MySqlAPI.h
+++ b/src/db/mysql/MySqlAPI.h
@@ -181,10 +181,12 @@ public:
     /// Add a new retry to the transfer identified by fileId
     /// @param jobId    Job identifier
     /// @param fileId   Transfer identifier
+    /// @param retryNo  The retry attempt number
     /// @param reason   String representation of the failure
     /// @param errcode  An integer representing the failure
-    virtual void setRetryTransfer(const std::string & jobId, uint64_t fileId, int retry, const std::string& reason,
-        int errcode);
+    /// @param logFile  The log file path
+    virtual void setRetryTransfer(const std::string& jobId, uint64_t fileId, int retryNo,
+                                  const std::string& reason, const std::string& logFile, int errcode);
 
     /// Bulk update of transfer progress
     virtual void updateFileTransferProgressVector(const std::vector<fts3::events::MessageUpdater> &messages);
diff --git a/src/msg-bus/events/Message.proto b/src/msg-bus/events/Message.proto
index 24c3d87ed..b3e0e2509 100644
--- a/src/msg-bus/events/Message.proto
+++ b/src/msg-bus/events/Message.proto
@@ -27,4 +27,6 @@ message Message {
     optional double instantaneous_throughput = 18;
     optional uint64 gfal_perf_timestamp = 19;
     optional uint64 transferred_since_last_ping = 20;
+
+    optional string log_path = 21;
 }
diff --git a/src/server/services/transfers/MessageProcessingService.cpp b/src/server/services/transfers/MessageProcessingService.cpp
index 389260f2f..50922f44b 100644
--- a/src/server/services/transfers/MessageProcessingService.cpp
+++ b/src/server/services/transfers/MessageProcessingService.cpp
@@ -247,7 +247,8 @@ void MessageProcessingService::performOtherMessageDbChange(const fts3::events::M
                     if (retryTimes <= retry - 1)
                     {
                         db::DBSingleton::instance().getDBObjectInstance()->setRetryTransfer(
-                            msg.job_id(), msg.file_id(), retryTimes+1, msg.transfer_message(), msg.errcode());
+                            msg.job_id(), msg.file_id(), retryTimes + 1,
+                            msg.transfer_message(), msg.log_path(), msg.errcode());
                         return;
                     }
                 }
diff --git a/src/url-copy/LegacyReporter.cpp b/src/url-copy/LegacyReporter.cpp
index 2417a6af3..79114e5c6 100644
--- a/src/url-copy/LegacyReporter.cpp
+++ b/src/url-copy/LegacyReporter.cpp
@@ -152,6 +152,8 @@ void LegacyReporter::sendTransferCompleted(const Transfer &transfer, Gfal2Transf
             if ((transfer.error->code() == EEXIST) && (opts.dstFileReport) && (!opts.overwrite)) {
                 status.set_file_metadata(replaceMetadataString(transfer.fileMetadata));
             }
+
+            status.set_log_path(transfer.logFile);
         }
         status.set_transfer_message(fullErrMsg.str());
         status.set_retry(transfer.error->isRecoverable());
-- 
GitLab


From 137c53f0bdbbdd5bbc4619f87d37604e654f45dd Mon Sep 17 00:00:00 2001
From: Mihai Patrascoiu <mihai.patrascoiu@cern.ch>
Date: Fri, 21 Jul 2023 17:46:56 +0200
Subject: [PATCH 3/3] Format the SQL statements in "setRetryTransfer" function

---
 src/db/mysql/MySqlAPI.cpp | 82 ++++++++++++++++++++-------------------
 1 file changed, 42 insertions(+), 40 deletions(-)

diff --git a/src/db/mysql/MySqlAPI.cpp b/src/db/mysql/MySqlAPI.cpp
index 3966a0cfb..b62d79337 100644
--- a/src/db/mysql/MySqlAPI.cpp
+++ b/src/db/mysql/MySqlAPI.cpp
@@ -2702,35 +2702,30 @@ void MySqlAPI::setRetryTransfer(const std::string& jobId, uint64_t fileId, int r
 {
     soci::session sql(*connectionPool);
 
-    //expressed in secs, default delay
+    // Expressed in secs, default delay
     const int default_retry_delay = DEFAULT_RETRY_DELAY;
     int retry_delay = 0;
     std::string job_type;
-    soci::indicator ind = soci::i_ok;
+    auto ind = soci::i_ok;
 
     try
     {
-        sql <<
-            " select RETRY_DELAY, job_type  from t_job where job_id=:jobId ",
-            soci::use(jobId),
-            soci::into(retry_delay),
-            soci::into(job_type, ind)
-            ;
+        sql << "SELECT retry_delay, job_type FROM t_job WHERE job_id = :jobId ",
+                soci::use(jobId),
+                soci::into(retry_delay),
+                soci::into(job_type, ind);
 
         sql.begin();
 
-        if ( (ind == soci::i_ok) && job_type == "Y")
+        if ((ind == soci::i_ok) && (job_type == "Y"))
         {
-
             sql << "UPDATE t_job SET "
-                "    job_state = 'ACTIVE' "
-                "WHERE job_id = :jobId AND "
-                "      job_state NOT IN ('FINISHEDDIRTY','FAILED','CANCELED','FINISHED') AND "
-                "      job_type = 'Y'",
-                soci::use(jobId);
+                   "    job_state = 'ACTIVE' "
+                   "WHERE job_id = :jobId AND job_type = 'Y' AND "
+                   "      job_state NOT IN ('FINISHEDDIRTY', 'FAILED', 'CANCELED', 'FINISHED')",
+                    soci::use(jobId);
         }
 
-
         struct tm tTime;
         if (retry_delay > 0)
         {
@@ -2748,38 +2743,45 @@ void MySqlAPI::setRetryTransfer(const std::string& jobId, uint64_t fileId, int r
         int bring_online = -1;
         int copy_pin_lifetime = -1;
 
-        // query for the file state in DB
-        sql << "SELECT bring_online, copy_pin_lifetime FROM t_job WHERE job_id=:jobId",
-            soci::use(jobId),
-            soci::into(bring_online),
-            soci::into(copy_pin_lifetime);
+        // Query for the file state in DB
+        sql << "SELECT bring_online, copy_pin_lifetime FROM t_job WHERE job_id = :jobId",
+                soci::use(jobId),
+                soci::into(bring_online),
+                soci::into(copy_pin_lifetime);
 
-        //staging exception, if file failed with timeout and was staged before, reset it
-        if( (bring_online > 0 || copy_pin_lifetime > 0) && errcode == ETIMEDOUT)
+        // Staging exception: if file failed with timeout and was staged before, reset it
+        if ((bring_online > 0 || copy_pin_lifetime > 0) && (errcode == ETIMEDOUT))
         {
-            sql << "update t_file set retry = :retry, current_failures = 0, file_state='STAGING', "
-                "internal_file_params=NULL, transfer_host=NULL,start_time=NULL, pid=NULL, "
-                " filesize=0, staging_start=NULL, staging_finished=NULL where file_id=:file_id and job_id=:job_id AND file_state NOT IN ('FINISHED','STAGING','SUBMITTED','FAILED','CANCELED') ",
-                soci::use(retryNo),
-                soci::use(fileId),
-                soci::use(jobId);
+            sql << "UPDATE t_file SET "
+                   "    retry = :retryNo, current_failures = 0, file_state = 'STAGING', "
+                   "    filesize = 0, internal_file_params = NULL, transfer_host = NULL, pid = NULL, "
+                   "    start_time = NULL, staging_start = NULL, staging_finished = NULL "
+                   "WHERE file_id = :fileId AND job_id = :jobId AND "
+                   "      file_state NOT IN ('FINISHED', 'STAGING', 'SUBMITTED', 'FAILED', 'CANCELED')",
+                    soci::use(retryNo),
+                    soci::use(fileId),
+                    soci::use(jobId);
         }
         else
         {
-            sql << "UPDATE t_file SET retry_timestamp=:1, retry = :retry, file_state = 'SUBMITTED', start_time=NULL, "
-                "transfer_host=NULL, log_file=NULL,"
-                " log_file_debug=NULL, throughput = 0, current_failures = 1 "
-                " WHERE  file_id = :fileId AND  job_id = :jobId AND file_state NOT IN ('FINISHED','SUBMITTED','FAILED','CANCELED')",
-                soci::use(tTime), soci::use(retryNo), soci::use(fileId), soci::use(jobId);
-
+            sql << "UPDATE t_file SET "
+                   "    retry = :retryNo, retry_timestamp = :tTime, file_state = 'SUBMITTED', "
+                   "    throughput = 0, current_failures = 1, start_time = NULL, "
+                   "    transfer_host = NULL, log_file = NULL, log_file_debug = NULL "
+                   " WHERE file_id = :fileId AND job_id = :jobId AND "
+                   "       file_state NOT IN ('FINISHED', 'SUBMITTED', 'FAILED', 'CANCELED')",
+                    soci::use(retryNo),
+                    soci::use(tTime),
+                    soci::use(fileId),
+                    soci::use(jobId);
         }
 
-        // Keep log
+        // Keep transfer retry log
         sql << "INSERT IGNORE INTO t_file_retry_errors "
-            "    (file_id, attempt, datetime, reason, transfer_host, log_file) "
-            "VALUES (:fileId, :retryNo, UTC_TIMESTAMP(), :reason, :hostname, :logFile)",
-            soci::use(fileId), soci::use(retryNo),
-            soci::use(reason), soci::use(hostname), soci::use(logFile);
+               "       (file_id, attempt, datetime, reason, transfer_host, log_file) "
+               "VALUES (:fileId, :retryNo, UTC_TIMESTAMP(), :reason, :hostname, :logFile)",
+                soci::use(fileId), soci::use(retryNo),
+                soci::use(reason), soci::use(hostname), soci::use(logFile);
 
         sql.commit();
     }
-- 
GitLab