From 893841afee20baff3b9a7f794db6f1ecefc9e7d6 Mon Sep 17 00:00:00 2001 From: Fabrice Le Goff <fabrice.le.goff@cern.ch> Date: Thu, 28 Apr 2022 19:12:17 +0200 Subject: [PATCH] reorganized ProductionTools for installation of tools --- DeploymentTest/p1.test.atlascdr.bash | 2 +- .../stop.all.cs.instances.at.p1.machine.bash | 23 ------------------- ProductionTools/castor.eostimeout.freq | 3 --- .../{ => installed}/castor.checklogs | 3 +-- .../{ => installed}/castor.greplogs | 2 +- ProductionTools/{ => installed}/castor.logdir | 0 .../{ => installed}/castor.logsfreq | 2 +- .../{ => installed}/castor.notrunning | 0 ProductionTools/{ => installed}/castor.ps | 0 .../{ => installed}/castor.pslocal | 0 .../castor.sfotzdb_intr_writer.py} | 1 + .../castor.sfotzdb_reader.py} | 1 + .../castor.sfotzdb_writer.py} | 1 + ProductionTools/installed/castor.signal | 9 ++++++++ .../{ => installed}/castor.starttime | 0 .../{ => installed}/castor.stopall | 2 +- .../castor.updateconflist} | 0 .../oracle_db_disaster_recovery.py | 3 ++- point1.deployment.sh | 2 ++ 19 files changed, 21 insertions(+), 33 deletions(-) delete mode 100755 DeploymentTest/stop.all.cs.instances.at.p1.machine.bash delete mode 100755 ProductionTools/castor.eostimeout.freq rename ProductionTools/{ => installed}/castor.checklogs (75%) rename ProductionTools/{ => installed}/castor.greplogs (82%) rename ProductionTools/{ => installed}/castor.logdir (100%) rename ProductionTools/{ => installed}/castor.logsfreq (91%) rename ProductionTools/{ => installed}/castor.notrunning (100%) rename ProductionTools/{ => installed}/castor.ps (100%) rename ProductionTools/{ => installed}/castor.pslocal (100%) rename ProductionTools/{sfotzdb_intr_writer.py => installed/castor.sfotzdb_intr_writer.py} (69%) mode change 100644 => 100755 rename ProductionTools/{sfotzdb_reader.py => installed/castor.sfotzdb_reader.py} (70%) mode change 100644 => 100755 rename ProductionTools/{sfotzdb_writer.py => installed/castor.sfotzdb_writer.py} (70%) mode change 100644 => 100755 create mode 100755 ProductionTools/installed/castor.signal rename ProductionTools/{ => installed}/castor.starttime (100%) rename ProductionTools/{ => installed}/castor.stopall (56%) rename ProductionTools/{generate_instance_lists.sh => installed/castor.updateconflist} (100%) diff --git a/DeploymentTest/p1.test.atlascdr.bash b/DeploymentTest/p1.test.atlascdr.bash index 02eee28..5c68661 100755 --- a/DeploymentTest/p1.test.atlascdr.bash +++ b/DeploymentTest/p1.test.atlascdr.bash @@ -107,7 +107,7 @@ while [ "x$LISTING_OUTPUT" != x ]; do done echo "Stopping CastorScript" -~/signal-script.sh 12 +/sw/castor/tools/castor.signal 12 if [[ $COPY_ENABLED == True && $REMOTE_DIR == /eos/* ]]; then echo "Contents of remote directory:" diff --git a/DeploymentTest/stop.all.cs.instances.at.p1.machine.bash b/DeploymentTest/stop.all.cs.instances.at.p1.machine.bash deleted file mode 100755 index 1e04765..0000000 --- a/DeploymentTest/stop.all.cs.instances.at.p1.machine.bash +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -if [ -z $1 ]; then - echo "usage: $0 {test host}" - exit 1 -fi - -TESTHOST=$1 - - -KEYTAB_FILE="/daq_area/castor/$TESTHOST/atlascdr/atlascdr.keytab" -if ! ssh $TESTHOST "sudo -u atlascdr -i cat $KEYTAB_FILE &>/dev/null"; then - echo "error: keytab file not readble by atlascdr ($KEYTAB_FILE)" - exit 3 -else - echo "OK: readable keytab file: $KEYTAB_FILE" -fi - -SOURCE_DIR=`pwd`/.. -TOOLS_DIR=`pwd`/../../flegoff/castor_tools - -echo "sending signal 12 to CastorScripts at $TESTHOST" -ssh $TESTHOST sudo -u atlascdr -i $TOOLS_DIR/signal-script.sh 12 diff --git a/ProductionTools/castor.eostimeout.freq b/ProductionTools/castor.eostimeout.freq deleted file mode 100755 index ebf039f..0000000 --- a/ProductionTools/castor.eostimeout.freq +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash -grep "taking to long" /atlas/logs/castor/pc-tdq-sfo-0?/CopyLog.out | awk -F':' '{print $2}' | sort | uniq -c - diff --git a/ProductionTools/castor.checklogs b/ProductionTools/installed/castor.checklogs similarity index 75% rename from ProductionTools/castor.checklogs rename to ProductionTools/installed/castor.checklogs index 336c833..818c70b 100755 --- a/ProductionTools/castor.checklogs +++ b/ProductionTools/installed/castor.checklogs @@ -6,11 +6,10 @@ for c in `cat castor.confs`; do logdir=$(grep LogDir $c | awk '{print $NF}' | tr -d "'") logdir=${logdir%/} # remove trailing slash - lsl=$(ls -l `echo "$logdir/*.out"` 2>/dev/null) + lsl=$(ls -l `echo "$logdir/*.log"` 2>/dev/null) if [ $? -eq 0 ]; then # Special case for ManagerLog.out which always contain 80/81 bytes # because it outputs its thread ID even if logged are limited to >info - #echo "$lsl" | awk '/.*ManagerLog.out$/{if ($5 > 82) print $NF ": " $5} ! /.*ManagerLog.out$/ {if ($5 > 0) print $NF ": " $5}' echo "$lsl" | awk '{if ($5 > 0) print $NF ": " $5}' else echo "No log files for $c: $logdir" diff --git a/ProductionTools/castor.greplogs b/ProductionTools/installed/castor.greplogs similarity index 82% rename from ProductionTools/castor.greplogs rename to ProductionTools/installed/castor.greplogs index 493b2fc..096f716 100755 --- a/ProductionTools/castor.greplogs +++ b/ProductionTools/installed/castor.greplogs @@ -14,7 +14,7 @@ fi for conf in "$CASTOR_CONFS"; do logdir=`grep LogDir $conf | awk '{print $NF}' | tr -d "'"` - for logfile in `find $logdir -maxdepth 1 -name '*.out'`; do #-maxdepth 1 + for logfile in `find $logdir -maxdepth 1 -name '*.log.*'`; do #-maxdepth 1 grep "$TOGREP" $logfile done done diff --git a/ProductionTools/castor.logdir b/ProductionTools/installed/castor.logdir similarity index 100% rename from ProductionTools/castor.logdir rename to ProductionTools/installed/castor.logdir diff --git a/ProductionTools/castor.logsfreq b/ProductionTools/installed/castor.logsfreq similarity index 91% rename from ProductionTools/castor.logsfreq rename to ProductionTools/installed/castor.logsfreq index 7ec1fef..b295705 100755 --- a/ProductionTools/castor.logsfreq +++ b/ProductionTools/installed/castor.logsfreq @@ -26,7 +26,7 @@ for i in `seq -$(($NB_DAYS - 1)) 0`; do for conf in "$CASTOR_CONFS"; do logdir=`grep LogDir $conf | awk '{print $NF}' | tr -d "'"` - for logfile in `find $logdir -name '*.out'`; do #-maxdepth 1 + for logfile in `find $logdir -name '*.log.*'`; do #-maxdepth 1 grep "$TOGREP" $logfile | grep -c $DATE done done | awk 'BEGIN {s=0} {s+=$1} END {print s}' diff --git a/ProductionTools/castor.notrunning b/ProductionTools/installed/castor.notrunning similarity index 100% rename from ProductionTools/castor.notrunning rename to ProductionTools/installed/castor.notrunning diff --git a/ProductionTools/castor.ps b/ProductionTools/installed/castor.ps similarity index 100% rename from ProductionTools/castor.ps rename to ProductionTools/installed/castor.ps diff --git a/ProductionTools/castor.pslocal b/ProductionTools/installed/castor.pslocal similarity index 100% rename from ProductionTools/castor.pslocal rename to ProductionTools/installed/castor.pslocal diff --git a/ProductionTools/sfotzdb_intr_writer.py b/ProductionTools/installed/castor.sfotzdb_intr_writer.py old mode 100644 new mode 100755 similarity index 69% rename from ProductionTools/sfotzdb_intr_writer.py rename to ProductionTools/installed/castor.sfotzdb_intr_writer.py index 3437db7..857da17 --- a/ProductionTools/sfotzdb_intr_writer.py +++ b/ProductionTools/installed/castor.sfotzdb_intr_writer.py @@ -1,3 +1,4 @@ +#!/sw/atlas/sw/lcg/releases/LCG_101/Python/3.9.6/x86_64-centos7-gcc11-opt/bin/python -i import cx_Oracle import coral_auth user,pwd,dbn = coral_auth.get_connection_parameters_from_connection_string('oracle://int8r/ATLAS_SFO_T0') diff --git a/ProductionTools/sfotzdb_reader.py b/ProductionTools/installed/castor.sfotzdb_reader.py old mode 100644 new mode 100755 similarity index 70% rename from ProductionTools/sfotzdb_reader.py rename to ProductionTools/installed/castor.sfotzdb_reader.py index 0d48441..7eb5ff7 --- a/ProductionTools/sfotzdb_reader.py +++ b/ProductionTools/installed/castor.sfotzdb_reader.py @@ -1,3 +1,4 @@ +#!/sw/atlas/sw/lcg/releases/LCG_101/Python/3.9.6/x86_64-centos7-gcc11-opt/bin/python -i import cx_Oracle import coral_auth user,pwd,dbn = coral_auth.get_connection_parameters_from_connection_string('oracle://atonr_conf/ATLAS_SFO_T0_R') diff --git a/ProductionTools/sfotzdb_writer.py b/ProductionTools/installed/castor.sfotzdb_writer.py old mode 100644 new mode 100755 similarity index 70% rename from ProductionTools/sfotzdb_writer.py rename to ProductionTools/installed/castor.sfotzdb_writer.py index 2bb7f6e..d4c4717 --- a/ProductionTools/sfotzdb_writer.py +++ b/ProductionTools/installed/castor.sfotzdb_writer.py @@ -1,3 +1,4 @@ +#!/sw/atlas/sw/lcg/releases/LCG_101/Python/3.9.6/x86_64-centos7-gcc11-opt/bin/python -i import cx_Oracle import coral_auth user,pwd,dbn = coral_auth.get_connection_parameters_from_connection_string('oracle://atonr_conf/ATLAS_SFO_T0') diff --git a/ProductionTools/installed/castor.signal b/ProductionTools/installed/castor.signal new file mode 100755 index 0000000..e768e1e --- /dev/null +++ b/ProductionTools/installed/castor.signal @@ -0,0 +1,9 @@ +#!/bin/bash + +pid=`ps -elf | grep Castor | grep python | grep -v grep | awk '{print \$4}'` +if [ "x$pid" == x ] ; then + echo "cannot send signal: castor script is not running" + exit -1 +else + kill -s $1 $pid +fi diff --git a/ProductionTools/castor.starttime b/ProductionTools/installed/castor.starttime similarity index 100% rename from ProductionTools/castor.starttime rename to ProductionTools/installed/castor.starttime diff --git a/ProductionTools/castor.stopall b/ProductionTools/installed/castor.stopall similarity index 56% rename from ProductionTools/castor.stopall rename to ProductionTools/installed/castor.stopall index 74ba118..dda4758 100755 --- a/ProductionTools/castor.stopall +++ b/ProductionTools/installed/castor.stopall @@ -1,5 +1,5 @@ #!/usr/bin/env bash for c in `cat castor.machines`; do echo "$c" - ssh -x $c "~flegoff/castor_tools/signal-script.sh 12" + ssh -x $c "/sw/castor/tools/castor.signal 12" done diff --git a/ProductionTools/generate_instance_lists.sh b/ProductionTools/installed/castor.updateconflist similarity index 100% rename from ProductionTools/generate_instance_lists.sh rename to ProductionTools/installed/castor.updateconflist diff --git a/ProductionTools/oracle_db_disaster_recovery/oracle_db_disaster_recovery.py b/ProductionTools/oracle_db_disaster_recovery/oracle_db_disaster_recovery.py index 1afb68d..7878d3c 100644 --- a/ProductionTools/oracle_db_disaster_recovery/oracle_db_disaster_recovery.py +++ b/ProductionTools/oracle_db_disaster_recovery/oracle_db_disaster_recovery.py @@ -103,7 +103,8 @@ if not args.dryrun: f'/mnt/daq_area_rw/castor/pc-tdq-sfo-{i}/atlascdr/prod.stopped') if args.verbose: print(f'disabled CS on sfo-{i}') if castorscript_is_running(f'pc-tdq-sfo-{i}', 'prod.cfg'): - stop_cs_cmd = ['ssh', '-x', f'pc-tdq-sfo-{i}', '~/signal-script.sh', '12'] + stop_cs_cmd = ['ssh', '-x', f'pc-tdq-sfo-{i}', + '/sw/castor/tools/castor.signal', '12'] subprocess.run(stop_cs_cmd, check=True) if args.verbose: print(f'sent stop signal to CS on sfo-{i}: waiting for CS to stop') while castorscript_is_running(f'pc-tdq-sfo-{i}', 'prod.cfg'): diff --git a/point1.deployment.sh b/point1.deployment.sh index c6064fe..c149139 100755 --- a/point1.deployment.sh +++ b/point1.deployment.sh @@ -32,6 +32,8 @@ fi # DB disaster recovery are deployed as part of the CastorScript mv ProductionTools/oracle_db_disaster_recovery . || err "cannot mv oracle_db_disaster_recovery" +# operation tools +mv ProductionTools/installed tools || err "cannot mv installed tools" # remove from local directory what we don't want to be deployed rm -rf Configs DeploymentTest ProductionTools UnitTests .git || err "cannot delete" -- GitLab