Fix archival job retrival and implement minimum set of methods to enable a file to be written to tape
When archival job is queued to the PostgresSchedDB the tape server trying to retrieve that row fails to do so:
Mar 18 15:27:38.238281 tpsrv01 cta-taped: LVL="INFO" PID="409" TID="409" MSG="In Scheduler::ping(): success." drive_name="VDSTK11" instance="CI" sched_backend="VFS" SubprocessName="drive:VDSTK11" catalogueTime="0.004998" schedulerDbTime="0.004475" checkEnvironmentVariablesTime="0.000057"
Mar 18 15:27:38.238470 tpsrv01 cta-taped: LVL="INFO" PID="409" TID="409" MSG="Set process capabilities for using tape" drive_name="VDSTK11" instance="CI" sched_backend="VFS" capabilities="= cap_sys_rawio+ep"
Mar 18 15:27:44.126207 tpsrv01 cta-taped: LVL="DEBUG" PID="322" TID="322" MSG="In MaintenanceHandler::exceptionThrowingRunChild(): Waiting for a message ended." drive_name="VDSTK11" instance="CI" sched_backend="VFS" SubprocessName="maintenanceHandler"
Mar 18 15:27:44.126279 tpsrv01 cta-taped: LVL="DEBUG" PID="322" TID="322" MSG="In MaintenanceHandler::exceptionThrowingRunChild(): About to do a maintenance pass." drive_name="VDSTK11" instance="CI" sched_backend="VFS" SubprocessName="maintenanceHandler"
Mar 18 15:27:44.126298 tpsrv01 cta-taped: LVL="DEBUG" PID="322" TID="322" MSG="In DiskReportRunner::runOnePass(): getting next archive jobs to report from Scheduler DB" drive_name="VDSTK11" instance="CI" sched_backend="VFS" SubprocessName="maintenanceHandler"
Mar 18 15:27:44.126626 tpsrv01 cta-taped: LVL="DEBUG" PID="322" TID="322" MSG="In PostgresSchedDB::getNextArchiveJobsToReportBatch(): Before getting archive row." drive_name="VDSTK11" instance="CI" sched_backend="VFS" SubprocessName="maintenanceHandler"
Mar 18 15:27:44.126783 tpsrv01 cta-taped: LVL="DEBUG" PID="322" TID="322" MSG="In PostgresSchedDB::getNextArchiveJobsToReportBatch(): After getting archive row." drive_name="VDSTK11" instance="CI" sched_backend="VFS" SubprocessName="maintenanceHandler"
Mar 18 15:27:44.126803 tpsrv01 cta-taped: LVL="DEBUG" PID="322" TID="322" MSG="In PostgresSchedDB::getNextArchiveJobsToReportBatch(): Before Next Result is fetched." drive_name="VDSTK11" instance="CI" sched_backend="VFS" SubprocessName="maintenanceHandler"
Mar 18 15:27:44.127020 tpsrv01 cta-taped: LVL="DEBUG" PID="322" TID="322" MSG="In PostgresSchedDB::getNextArchiveJobsToReportBatch(): After Next Result is fetched." drive_name="VDSTK11" instance="CI" sched_backend="VFS" SubprocessName="maintenanceHandler"
Mar 18 15:27:44.129388 tpsrv01 cta-taped: LVL="DEBUG" PID="322" TID="322" MSG="In PostgresSchedDB::getNextArchiveJobsToReportBatch(): Exception thrown: /lib64/libctacommon.so.0(cta::exception::Backtrace::Backtrace(bool)+0x6b) [0x7f6b8e9b1333]
/lib64/libctacommon.so.0(cta::exception::Exception::Exception(std::basic_string_view<char, std::char_traits<char> >, bool)+0x91) [0x7f6b8e9b234b]
/lib64/libctardbms.so.0(cta::rdbms::NullDbValue::NullDbValue(std::string const&, bool)+0x53) [0x7f6b906287b3]
/lib64/libctardbms.so.0(cta::rdbms::Rset::columnUint64(std::string const&) const+0x17e) [0x7f6b9062a04c]
/lib64/libctascheduler.so.0(cta::postgresscheddb::sql::ArchiveJobQueueRow::operator=(cta::rdbms::Rset const&)+0xb1) [0x7f6b8fe3f447]
/lib64/libctascheduler.so.0(cta::postgresscheddb::sql::ArchiveJobQueueRow::ArchiveJobQueueRow(cta::rdbms::Rset const&)+0x1cb) [0x7f6b8fe3f299]
/lib64/libctascheduler.so.0(void __gnu_cxx::new_allocator<std::_List_node<cta::postgresscheddb::sql::ArchiveJobQueueRow> >::construct<cta::postgresscheddb::sql::ArchiveJobQueueRow, cta::rdbms::Rset&>(cta::postgresscheddb::sql::ArchiveJobQueueRow*, cta::rdbms::Rset&)+0x56) [0x7f6b8fe4df96]
/lib64/libctascheduler.so.0(void std::allocator_traits<std::allocator<std::_List_node<cta::postgresscheddb::sql::ArchiveJobQueueRow> > >::construct<cta::postgresscheddb::sql::ArchiveJobQueueRow, cta::rdbms::Rset&>(std::allocator<std::_List_node<cta::postgresscheddb::sql::ArchiveJobQueueRow> >&, cta::postgresscheddb::sql::ArchiveJobQueueRow*, cta::rdbms::Rset&)+0x45) [0x7f6b8fe4bd8b]
/lib64/libctascheduler.so.0(std::_List_node<cta::postgresscheddb::sql::ArchiveJobQueueRow>* std::list<cta::postgresscheddb::sql::ArchiveJobQueueRow, std::allocator<cta::postgresscheddb::sql::ArchiveJobQueueRow> >::_M_create_node<cta::rdbms::Rset&>(cta::rdbms::Rset&)+0x87) [0x7f6b8fe48aa9]
/lib64/libctascheduler.so.0(void std::list<cta::postgresscheddb::sql::ArchiveJobQueueRow, std::allocator<cta::postgresscheddb::sql::ArchiveJobQueueRow> >::_M_insert<cta::rdbms::Rset&>(std::_List_iterator<cta::postgresscheddb::sql::ArchiveJobQueueRow>, cta::rdbms::Rset&)+0x41) [0x7f6b8fe4591f]
/lib64/libctascheduler.so.0(cta::postgresscheddb::sql::ArchiveJobQueueRow& std::list<cta::postgresscheddb::sql::ArchiveJobQueueRow, std::allocator<cta::postgresscheddb::sql::ArchiveJobQueueRow> >::emplace_back<cta::rdbms::Rset&>(cta::rdbms::Rset&)+0x50) [0x7f6b8fe42bee]
/lib64/libctascheduler.so.0(cta::PostgresSchedDB::getNextArchiveJobsToReportBatch(unsigned long, cta::log::LogContext&)+0x1cb) [0x7f6b8fe3a165]
/lib64/libctascheduler.so.0(cta::Scheduler::getNextArchiveJobsToReportBatch(unsigned long, cta::log::LogContext&)+0x72) [0x7f6b8fdb7f02]
/lib64/libctascheduler.so.0(cta::DiskReportRunner::runOnePass(cta::log::LogContext&)+0xf5) [0x7f6b8fd8ce7d]
/usr/bin/cta-taped() [0x48be36]
/usr/bin/cta-taped() [0x48b84a]
/usr/bin/cta-taped() [0x491472]
/usr/bin/cta-taped() [0x490628]
/usr/bin/cta-taped() [0x45cccb]
/usr/bin/cta-taped() [0x45c9a2]
/usr/bin/cta-taped() [0x45c3bd]
/usr/bin/cta-taped() [0x454d05]
/usr/bin/cta-taped() [0x4553da]
/lib64/libc.so.6(__libc_start_main+0xf5) [0x7f6b88561555]
/usr/bin/cta-taped() [0x454839]
" drive_name="VDSTK11" instance="CI" sched_backend="VFS" SubprocessName="maintenanceHandler"
Mar 18 15:27:44.129450 tpsrv01 cta-taped: LVL="DEBUG" PID="322" TID="322" MSG="In PostgresSchedDB::getNextArchiveJobsToReportBatch(): After emplace_back." drive_name="VDSTK11" instance="CI" sched_backend="VFS" SubprocessName="maintenanceHandler"
Mar 18 15:27:44.129476 tpsrv01 cta-taped: LVL="DEBUG" PID="322" TID="322" MSG="In PostgresSchedDB::getNextArchiveJobsToReportBatch(): Before Archive Jobs filled." drive_name="VDSTK11" instance="CI" sched_backend="VFS" SubprocessName="maintenanceHandler"
Mar 18 15:27:44.129491 tpsrv01 cta-taped: LVL="DEBUG" PID="322" TID="322" MSG="In PostgresSchedDB::getNextArchiveJobsToReportBatch(): After Archive Jobs filled, before return." drive_name="VDSTK11" instance="CI" sched_backend="VFS" SubprocessName="maintenanceHandler"
Mar 18 15:27:44.129689 tpsrv01 cta-taped: LVL="DEBUG" PID="322" TID="322" MSG="In DiskReportRunner::runOnePass(): archiveJobsToReport no size." drive_name="VDSTK11" instance="CI" sched_backend="VFS" SubprocessName="maintenanceHandler"
Mar 18 15:27:44.129704 tpsrv01 cta-taped: LVL="DEBUG" PID="322" TID="322" MSG="In DiskReportRunner::runOnePass(): archiveJobsToReport is empty." drive_name="VDSTK11" instance="CI" sched_backend="VFS" SubprocessName="maintenanceHandler"
despite the row is in the DB:
[cta@jaro-dev-cc7 tests (620-taped-server-dies-when-configuring-pgsched-as-a-backend)]$ kubectl exec --namespace nsdev postgres-sched -ti /bin/bash
root@postgres-sched:/# psql -U cta postgres-sched
psql (9.6.24)
Type "help" for help.
postgres-sched=# \dt
List of relations
Schema | Name | Type | Owner
--------+--------------------+-------+-------
public | archive_job_queue | table | cta
public | cta_scheduler | table | cta
public | repack_job_queue | table | cta
public | retrieve_job_queue | table | cta
public | tape_mounts | table | cta
(5 rows)
postgres-sched=# select * from archive_job_queue;
job_id | archive_reqid | status | creation_time | mount_policy | vid | mount_id | start_time | priority | storage_class | min_archive_request_age | copy_nb | size_in_bytes | archive_file_id |
checksumblob | requester_name | requester_group | src_url | disk_instance | disk_file_path | dis
k_file_id | disk_file_gid | disk_file_owner_uid | repack_reqid | is_repack | archive_error_report_url
| archive_rep
ort_url | failure_report_log | failure_log | repack_dest_vid | is_reportdecided | total_retries | max_total_retri
es | retries_within_mount | max_retries_within_mount | last_mount_with_failure | total_report_retries | max_report_retries | tape_pool | repack_filebuf_url | repack_fseq
--------+---------------+-----------------------+---------------+--------------+-----+----------+------------+----------+-----------------+-------------------------+---------+---------------+-----------------+---
---------------------+----------------+-----------------+---------------------------------------------------------------------------------------------+---------------+----------------------------------------+----
----------+---------------+---------------------+--------------+-----------+----------------------------------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------------+--------------------+-------------+-----------------+------------------+---------------+----------------
---+----------------------+--------------------------+-------------------------+----------------------+--------------------+------------+--------------------+-------------
1 | 1 | AJS_ToTransferForUser | 1710772056 | ctasystest | | | 1710772056 | 1 | ctaStorageClass | 1 | 1 | 6 | 4294967296 | \x
0a08080112041702de07 | user1 | eosusers | root://ctaeos.nsdev.svc.cluster.local//eos/ctaeos/preprod/test_http-rest-api?eos.lfn=fxid:e | ctaeos | /eos/ctaeos/preprod/test_http-rest-api | 14
| 1100 | 11001 | | | eosQuery://ctaeos.nsdev.svc.cluster.local//eos/wfe/passwd?mgm.pcmd=event&mgm.fid=e&mgm.logid=cta&mgm.event=sync::archive_failed&mgm.wor
kflow=default&mgm.path=/dummy_path&mgm.ruid=0&mgm.rgid=0&cta_archive_file_id=4294967296&mgm.errmsg= | eosQuery://ctaeos.nsdev.svc.cluster.local//eos/wfe/passwd?mgm.pcmd=event&mgm.fid=e&mgm.logid=cta&mgm.event=syn
c::archived&mgm.workflow=default&mgm.path=/dummy_path&mgm.ruid=0&mgm.rgid=0&cta_archive_file_id=4294967296 | | | | | 0 |
2 | 0 | | 0 | | | ctasystest | |
(1 row)