Skip to content
Snippets Groups Projects
Commit bf86df06 authored by Fabrice Le Goff's avatar Fabrice Le Goff
Browse files

oracle db recovery: some fixes

parent 771a9940
No related branches found
No related tags found
No related merge requests found
......@@ -32,7 +32,7 @@ def oracle_check_closed_ondisk(oradb_url, run_number, outliers_file,
ora_state_3 = oradb.db.curs.execute("""select SFOID, SFOPFN from SFO_TZ_FILE
where RUNNR=:runnr and FILESTATE='CLOSED' and TRANSFERSTATE='ONDISK'
""", runnr=run_number).fetchall()
and SFOID like 'SFO%'""", runnr=run_number).fetchall()
if verbose: print(f'Found {len(ora_state_3)} (closed,ondisk)')
with open(outliers_file, 'w') as outf:
......
......@@ -33,7 +33,7 @@ def oracle_check_closed_transferred(oradb_url, run_number, outliers_file,
if report: reportf = open(report, 'a')
ora_state_4 = oradb.db.curs.execute("""select SFOID, SFOPFN from SFO_TZ_FILE
where RUNNR=:runnr and FILESTATE='CLOSED'
where RUNNR=:runnr and FILESTATE='CLOSED' and SFOID like 'SFO%'
and TRANSFERSTATE='TRANSFERRED'""", runnr=run_number).fetchall()
if verbose: print(f'Found {len(ora_state_4)} (closed,transferred)')
......
......@@ -114,7 +114,7 @@ for i in args.sfos:
if not os.path.exists(f'sfo-{i}.{args.run_number}.sqlite'):
scp_cmd = ['scp', f'pc-tdq-sfo-{i}:/dsk1/sqlite/sfo-sqlite-{args.run_number}.db',
f'sfo-{i}.{args.run_number}.sqlite']
subprocess.run(scp_cmd, check=True)
subprocess.run(scp_cmd, check=True, stdout=subprocess.PIPE)
if args.verbose: print(f'downloaded sqlite: sfo-{i}.{args.run_number}.sqlite')
if args.verbose: print('recovery step A: bring state 1 (missing) and 2 (opened) to 3 (closed)')
......@@ -134,6 +134,10 @@ oracle_check_closed_ondisk(args.oradb_url, args.run_number, args.outliers_file,
verbose=args.verbose, dryrun=args.dryrun, report=args.report)
if os.path.exists(args.outliers_file) and os.path.getsize(args.outliers_file) != 0:
print(f'there are some outliers: check file "{args.outliers_file}"')
print('CastorScript instances NOT restarted: re-enable them after having'
' taken care of outliers')
else:
if args.verbose: print('no outliers')
if args.verbose: print('starting CastorScript instances')
if not args.dryrun:
......@@ -141,7 +145,3 @@ if os.path.exists(args.outliers_file) and os.path.getsize(args.outliers_file) !=
os.rename(f'/mnt/daq_area_rw/castor/pc-tdq-sfo-{i}/atlascdr/prod.stopped',
f'/mnt/daq_area_rw/castor/pc-tdq-sfo-{i}/atlascdr/prod.cfg')
if args.verbose: print(f're-enabled CS on sfo-{i}')
else:
print(f'there are some outliers: check file "{args.outliers_file}"')
print('CastorScript instances NOT restarted: re-enable them after having'
' taken care of outliers')
#!/bin/env python
import os.path
import sys
if __name__ == "__main__":
from os.path import dirname, abspath, join
# add /../../CastorScript/Script to path so imports keeps working
SCRIPT_DIR = abspath(join(dirname(__file__), '..','..',".."))
sys.path.append(SCRIPT_DIR)
from cs.Tools.FilenameParsers.SFOFileNameParser import SFOFileNameParser
BASEDIR = '/castor/cern.ch/grid/atlas/tzero/prod1/perm/'
def MergedMigrated(castorfile, stager, pool, backend, logger=None, verbose=False):
"""
Destination directory:
/<basedir>/<projecttag>/<streamtype_streamname>/<runnr-7digit>/<dataset>/
basedir = /castor/cern.ch/grid/atlas/tzero/prod1/perm/
projecttag = data09_cos
dataset = data09_cos.00122050.physics_IDCosmic.merge.RAW
Merged file name is:
1) if all SFO files of a LB fit into a single file
<project>.<runnr>.<stream>.daq.RAW._lbXXXX._SFO-N._<fileseqnr>.data, N=4,5,9,10
--> <project>.<runnr>.<stream>.merge.RAW._lbXXXX._SFO-ALL._0001.<attemptnr>
2) if all SFO files of a LB don't fit into a single file
<project>.<runnr>.<stream>.daq.RAW._lbXXXX._SFO-N._<fileseqnr>.data, N=4,5,9,10
--> <project>.<runnr>.<stream>.merge.RAW._lbXXXX._SFO-N._<fileseqnr>.<attemptnr>, N=4,5,9,10
"""
### Build the target directory name
parsed = SFOFileNameParser(os.path.basename(castorfile))
dataset = '%s.%s.%s_%s.merge.RAW' \
% (parsed.ProjectTag(), parsed.RunNr(), \
parsed.StreamType(), parsed.StreamName())
path = os.path.join(BASEDIR, parsed.ProjectTag(), \
'%s_%s' % (parsed.StreamType(), parsed.StreamName()), \
'%s' % parsed.RunNr(), \
'%s' % dataset)
if verbose:
print(path)
### Look for files into the dataset dir
success, all_files = backend.listdir(path, stager, pool, logger)
### If fails, return false
if not success: return False
### Create the two type of name we expect (neglecting attempt number)
notmerged = os.path.splitext(os.path.basename(castorfile))[0]
notmerged = notmerged.replace('.daq.','.merge.')
merged = '.'.join(notmerged.split('.',6)[:-1]+['_SFO-ALL._0001'])
### Find all the files with the correct name
files = [f for f in all_files
if f and (notmerged in f or merged in f)]
if not files: return False
### Take the file with the highest attempt number
filename = sorted([(int(f.rsplit('.',1)[1]),f) for f in files])[-1][1]
### Check the migration status for the file
return backend.migrated(os.path.join(path, filename),
stager, pool, logger)
if __name__ == '__main__':
import importlib
backend_module = importlib.import_module("cs.StorageBackends.castorstorage")
print(MergedMigrated(sys.argv[1], sys.argv[2], sys.argv[3],
backend_module, verbose=True))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment