From 526f7380050f64d4ada628a1bc3c26f03b9b4ae5 Mon Sep 17 00:00:00 2001 From: Fabrice Le Goff <fabrice.le.goff@cern.ch> Date: Wed, 4 May 2022 10:25:00 +0200 Subject: [PATCH] some changes in log messages, change in FileNotFoundError handling --- Script/CastorScript.py | 4 ++ Script/cs/Threads/CopyThread.py | 51 +++++++------------------- Script/cs/Threads/DeleteThread.py | 29 ++++----------- Script/cs/Threads/InfoServiceThread.py | 13 +------ Script/cs/Threads/ManagerThread.py | 44 +++++++++------------- 5 files changed, 43 insertions(+), 98 deletions(-) diff --git a/Script/CastorScript.py b/Script/CastorScript.py index c27cc70..a2feb13 100644 --- a/Script/CastorScript.py +++ b/Script/CastorScript.py @@ -19,6 +19,7 @@ import cs.Tools.Constants as Constants import cs.Tools.LogConfig as LogConfig import cs.Tools.krb as krb import cs.Tools.Libraries.Database as Database +from cs.Tools.utils import thread_id_string def main_exit(signal_number, stackframe): del signal_number, stackframe @@ -167,6 +168,9 @@ if config.CopyEnabled: signal.signal(signal.SIGUSR2, main_exit) signal.signal(signal.SIGINT, main_exit) +# initial setup is done, now starts the real work of the main thread +logger.info(thread_id_string()) + # Connect to database db = None db = checkDB(db, logger, dblogger, filename_parser, config) diff --git a/Script/cs/Threads/CopyThread.py b/Script/cs/Threads/CopyThread.py index b77a3cd..b919d32 100644 --- a/Script/cs/Threads/CopyThread.py +++ b/Script/cs/Threads/CopyThread.py @@ -5,7 +5,6 @@ from time import time import signal import queue import uuid -import errno import cs.Tools.Constants as Constants from cs.Tools.utils import thread_id_string, adler32, get_bw, dir_is_locked from cs.Tools.LogConfig import enable_file_logging @@ -120,21 +119,13 @@ class CopyThread(threading.Thread): try: local_filesize = os.stat(fmd.file_name).st_size - except OSError as exc: - if exc.errno == errno.ENOENT: - # The file has been removed by an external cause. - # This is bad but this is not a reason to crash. - self.logger.warning('error getting local file size: ' - 'no such file, %s', fmd.file_name) - # delete associated file (ignoring errors) - try: - os.remove(fmd.file_name + Constants.tobecopied_ext) - except: - pass - self.clearqueue.put(fmd) - continue - else: - raise exc + except FileNotFoundError as exc: + self.logger.warning(exc) + # delete associated file (ignoring errors) + try: os.remove(fmd.file_name + Constants.tobecopied_ext) + except: pass + self.clearqueue.put(fmd) + continue if self.db: with self.dblock: @@ -174,14 +165,8 @@ class CopyThread(threading.Thread): try: os.rename(fmd.file_name + Constants.tobecopied_ext, fmd.file_name + Constants.copying_ext) - except OSError as exc: - if exc.errno == errno.ENOENT: - # The file has been removed by an external cause. - # This is bad but this is not a reason to crash. - self.logger.warning('error renaming %s: no such file', - fmd.file_name + Constants.tobecopied_ext) - else: - raise exc + except FileNotFoundError as exc: + self.logger.warning(exc) self.transfers.append(TransferInfo(copyproc, fmd, 0, time(), local_filesize, checksum, dbfilesize)) @@ -291,12 +276,8 @@ class CopyThread(threading.Thread): try: os.rename(filename + Constants.copying_ext, filename + Constants.copied_ext) - except OSError as exc: - if exc.errno == errno.ENOENT: - self.logger.warning('error renaming %s: no such file', - filename + Constants.copying_ext) - else: - raise exc + except FileNotFoundError as exc: + self.logger.warning(exc) if self.conf.DdmMonitoringEnabled: self.publishToDdmMonitoring( @@ -362,14 +343,8 @@ class CopyThread(threading.Thread): os.rename(filename + Constants.copying_ext, filename + Constants.problematic_ext) self.problematic_transfer_failures += 1 - except OSError as exc: - if exc.errno == errno.ENOENT: - # The file has been removed by an external cause. - # This is bad but this is not a reason to crash. - self.logger.warning('error renaming %s: no such file', - filename + Constants.copying_ext) - else: - raise exc + except FileNotFoundError as exc: + self.logger.warning(exc) if self.conf.DdmMonitoringEnabled: self.publishToDdmMonitoring( diff --git a/Script/cs/Threads/DeleteThread.py b/Script/cs/Threads/DeleteThread.py index fd99cef..67f7206 100644 --- a/Script/cs/Threads/DeleteThread.py +++ b/Script/cs/Threads/DeleteThread.py @@ -1,7 +1,6 @@ #!/bin/env python import threading import os -import errno import importlib from time import time import re @@ -141,13 +140,9 @@ class DeleteThread(threading.Thread): if filename not in self.migration_waitroom: try: transfer_completed_time = os.path.getctime(filename + Constants.copied_ext) - except OSError as exc: - if exc.errno == errno.ENOENT: - self.logger.warning('error accessing ctime for %s: no such file, using now for transfer time', - filename + Constants.copied_ext) - transfer_completed_time = time() - else: - raise exc + except FileNotFoundError as exc: + self.logger.warning(exc) + transfer_completed_time = time() self.migration_waitroom[filename] = transfer_completed_time else: transfer_completed_time = self.migration_waitroom.get(filename) @@ -176,23 +171,13 @@ class DeleteThread(threading.Thread): try: os.remove(fmd.file_name) - except OSError as exc: - if exc.errno == errno.ENOENT: - # The file has been removed by an external cause. - # This is bad but this is not a reason to crash. - self.logger.warning('error deleting %s: no such file', fmd.file_name) - else: - raise exc + except FileNotFoundError as exc: + self.logger.warning(exc) try: os.remove(fmd.file_name + Constants.copied_ext) - except OSError as exc: - if exc.errno == errno.ENOENT: - # The .COPIED file has been removed by an external cause. - # This is bad but this is not a reason to crash. - self.logger.warning('error deleting %s: no such file', fmd.file_name + Constants.copied_ext) - else: - raise exc + except FileNotFoundError as exc: + self.logger.warning(exc) if self.db: self.logger.info('update database: deletion of %s', fmd.file_name) diff --git a/Script/cs/Threads/InfoServiceThread.py b/Script/cs/Threads/InfoServiceThread.py index d08c3e7..993731a 100644 --- a/Script/cs/Threads/InfoServiceThread.py +++ b/Script/cs/Threads/InfoServiceThread.py @@ -102,16 +102,14 @@ class InfoServiceThread(threading.Thread): fs_data_identifier = make_fs_data_identifier(self.is_server, self.tdaq_app_name, mountpoint) try: - self.logger.debug("creating ISObject: %s, type: %s", fs_data_identifier, fs_data_type) is_data = ispy.ISObject(self.ipc_partition, fs_data_identifier, fs_data_type) is_data.total_MB = (fs.f_blocks * fs.f_bsize) // 1000000 is_data.available_MB = (fs.f_bavail * fs.f_bsize) // 1000000 is_data.occupancy_percent = 100.*(1-float(fs.f_bavail)/float(fs.f_blocks)) - self.logger.debug("checkin of is_data") is_data.checkin() - self.logger.debug("Update sent: %s", str(is_data)) + self.logger.debug("FileSystemInfo sent: %s", str(is_data)) except Exception as ex: self.logger.error("Error occured in update_fs_info(): %s", str(ex)) @@ -122,7 +120,6 @@ class InfoServiceThread(threading.Thread): is_data_identifier = make_is_data_identifier(self.is_server, self.tdaq_app_name) try: - self.logger.debug("creating ISObject: %s, type: %s", is_data_identifier,is_data_type) is_data = ispy.ISObject(self.ipc_partition, is_data_identifier, is_data_type) is_data.uptime_seconds = self.get_process_uptime_in_seconds() @@ -145,15 +142,9 @@ class InfoServiceThread(threading.Thread): if self._delete_thread: is_data.files_deleted = self._delete_thread.files_deleted - self.logger.debug("getting the problematic dictionary lock from manager_thread") is_data.files_currently_problematic = self._manager_thread.getNbCurrentlyProblematic() - - self.logger.debug("Sending state: Uptime: %s, problematic_errors: %s, simple_errors: %s", - is_data.uptime_seconds, is_data.problematic_transfer_failures, is_data.simple_transfer_failures) - - self.logger.debug("checkin of is_data") is_data.checkin() - self.logger.debug("Update sent: %s", str(is_data)) + self.logger.debug("CastorScriptState sent: %s", str(is_data)) except Exception as ex: self.logger.error("Error occured in update_cs_info(): %s", str(ex)) diff --git a/Script/cs/Threads/ManagerThread.py b/Script/cs/Threads/ManagerThread.py index f5b0d09..c13eac1 100644 --- a/Script/cs/Threads/ManagerThread.py +++ b/Script/cs/Threads/ManagerThread.py @@ -5,7 +5,6 @@ import datetime import pprint import math import cs.Tools.Constants as Constants -import errno import re import queue from cs.Tools.utils import thread_id_string @@ -126,12 +125,16 @@ class ManagerThread(threading.Thread): self.logger.debug('removed from managed files: %s', fmd.file_name) clear_counter += 1 - self.logger.debug('removed entries from managed files: %d', clear_counter) + if clear_counter != 0: + self.logger.info('removed entries from managed files: %d', clear_counter) def searchFilesToTransfer(self): self.logger.debug('searching for files to transfer') - for fmd in self.getOrderedFilesToTransfer(): + newfiles = self.getOrderedFilesToTransfer() + if len(newfiles) != 0: + self.logger.info('found %d new files to handle', len(newfiles)) + for fmd in newfiles: self.managedfiles.append(fmd.file_name) if self.conf.CopyEnabled: self.logger.debug('adding to copyqueue: %s', fmd.file_name) @@ -192,12 +195,9 @@ class ManagerThread(threading.Thread): if self.conf.MinSizekB: try: filesize = os.path.getsize(filename) - except OSError as exc: - if exc.errno == errno.ENOENT: - self.logger.warning('error getting file size for %s: no such file', filename) - continue - else: - raise exc + except FileNotFoundError as exc: + self.logger.warning(exc) + continue if (filesize / 1024.) < self.conf.MinSizekB: self.logger.debug('skipping too small file: %s', filename) @@ -214,12 +214,10 @@ class ManagerThread(threading.Thread): time_to_retry = False try: time_to_retry = self.checkProbl(filename) - except OSError as exc: - if exc.errno == errno.ENOENT: - # file was deleted, nothing we can do - ManagerThread.cleanHelperFiles(filename) - else: - raise exc + except FileNotFoundError as exc: + # file was deleted, nothing we can do + self.logger.warning(exc) + ManagerThread.cleanHelperFiles(filename) if not time_to_retry: continue @@ -290,12 +288,7 @@ class ManagerThread(threading.Thread): # Inform the user if we cannot copy the file and the retry time # limit has been reached if retry_timeout > self.conf.ProblDelayLimit: - try: - filesize = os.path.getsize(filename) - except OSError as exc: - if exc.errno == errno.ENOENT: - self.logger.warning('error getting filesize: no such file, %s', filename) - raise exc + filesize = os.path.getsize(filename) self.logger.critical('problematic file %s reached the delay retry limit of %d seconds; ' 'filesize= %d, transfer_timeout= %d', filename, self.conf.ProblDelayLimit, filesize, self.conf.TransferTimeout) @@ -311,12 +304,9 @@ class ManagerThread(threading.Thread): try: mtime = FileMetaData.getMTime(filename) - except OSError as exc: - if exc.errno == errno.ENOENT: - self.logger.warning('error getting mtime: no such file, %s', filename) - return None - else: - raise exc + except FileNotFoundError as exc: + self.logger.warning(exc) + return None infofile = '' if os.path.isfile(filename + Constants.tobecopied_ext): infofile = filename + Constants.tobecopied_ext -- GitLab