Commit de6eb9d5 authored by Graeme Stewart's avatar Graeme Stewart
Browse files

Large refactoring of AmiLib to reduce cut'n'paste code, more robust error

reporting, throw proper exceptions.
Prevent AthFileLits from running AthenaMP mode (PyUtils-00-14-07)

	* python/AmiLib.py
	- Remove commented out code
	* Tagging as PyUtils-00-14-07

2014-12-18 Graeme Stewart <graeme.andrew.stewart@cern.ch>
	* python/AmiLib.py
	- Another big refactoring:
	- badresult improved and now returns a boolean and an error message
	- Rename dry_run to dryrun for consistency
	- Introduce PyUtilsAMIException exception, which now gets thrown
	  consistently if there are problems
	- Consolidate the functions of get_project_of_pkg, get_version_of_pkg
	  and tc_submit_tag._get_projects into single method get_pkg_info
	- Rewrite get_version_of_pkg_with_deps to use module utilities
	- Simplify find_pkg to remove callback function
	* python/scripts/tc_find_pkg.py
	* python/scripts/tc_find_tag.py
	- Embed script in try: except PyUtilsAMIException:
	* python/scripts/tc_show_clients.py
	- Update to get_pkg_info (N.B. Script is still non-functional and disabled)
...
(Long ChangeLog diff - truncated)
parent 70207396
#!/usr/bin/env python
# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
# @file: checkMetaSG.py
# @purpose: Print the metadata that would be accessible via the IOVMetaDataContainers of the MetaDataStore
# @author: Will Buttinger <will@cern.ch>
# @date: Oct 2014
#
# @example:
# @code
# checkMetaSG.py aod.pool.root
# @endcode
#
__version__ = "$Revision: 621253 $"
__author__ = "Will Buttinger <will@cern.ch>"
import sys
import os
from optparse import OptionParser
if __name__ == "__main__":
parser = OptionParser(usage="usage: %prog [options] [-f] my.file.pool")
parser.add_option( "-f",
"--file",
dest = "fileName",
help = "The path to the POOL file to analyze" )
parser.add_option( "-o",
"--output",
dest = "outFileName",
default = None,
help = "Name of the output file which will contain the informations gathered during checkSG processing. These informations will be stored into a python-shelve or an ASCII/py file (depending on the extension: .pkl,.dat -> shelve; everything else -> ASCII/py)" )
(options, args) = parser.parse_args()
fileNames = []
if len(args) > 0:
fileNames = [ arg for arg in args if arg[0] != "-" ]
pass
if options.fileName == None and len(fileNames) == 0:
str(parser.print_help() or "")
sys.exit(1)
if not (options.fileName is None):
fileName = os.path.expandvars(os.path.expanduser(options.fileName))
fileNames.append(fileName)
fileNames = set( fileNames )
sc = 0
for fileName in fileNames:
try:
from PyUtils import AthFile
print "## checking [%s]..."%fileName
metadata = AthFile.fopen(fileName).fileinfos['metadata']
print "="*91
print "%30s%-28s%-10s%-30s" % ("folder", " | key "," | type "," | value")
print "%30s%s%-25s%s%-7s%s%-30s" % ("-"*30, "-+-", "-"*(28-3),"-+-","-"*(10-3),"-+-","-"*(20))
for metaFolder,metaObj in metadata.items(): #metaObj may be dict, list (occurs with multi IOV), or none... so far only support dict FIXME
first=True
if isinstance(metaObj,dict):
for metaKey,metaValue in metaObj.items():
if first: print "%30s%s%-25s%s%-7s%s%-30s" % (metaFolder, " | ", metaKey," | ",type(metaValue).__name__," | ",metaValue) #print "%30s%s%-30s" % (metaFolder, " | ",metaKey+" = "+str(metaValue) )
else: print "%30s%s%-25s%s%-7s%s%-30s" % ("", " | ", metaKey," | ",type(metaValue).__name__," | ",metaValue)
first=False
print "="*91
if options.outFileName:
osp = os.path
outFileName = options.outFileName
outFileName = osp.expanduser(outFileName)
outFileName = osp.expandvars(outFileName)
print "## saving checkSG report into [%s]..." % outFileName
if os.path.splitext(outFileName)[1] in ('.pkl', '.dat'):
# we explicitely import 'bsddb' to try to always
# get that particular backend for the shelve...
import bsddb
import shelve
if os.path.exists(outFileName):
os.remove(outFileName)
db = shelve.open(outFileName)
db['eventdata_items'] = ks
db.close()
except Exception, e:
print "## Caught exception [%s] !!" % str(e.__class__)
print "## What:",e
print sys.exc_info()[0]
print sys.exc_info()[1]
sc = 1
pass
except :
print "## Caught something !! (don't know what)"
print sys.exc_info()[0]
print sys.exc_info()[1]
sc = 10
pass
if len(fileNames) > 1:
print ""
pass # loop over fileNames
print "## Bye."
sys.exit(sc)
......@@ -2,14 +2,14 @@
# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
#
# $Id: checkxAOD.py 592348 2014-04-10 12:06:41Z krasznaa $
# $Id: checkxAOD.py 619905 2014-10-03 16:11:12Z krasznaa $
#
# This is a modified version of PyUtils/bin/checkFile.py. It has been taught
# how to sum up the sizes of all the branches belonging to a single xAOD
# object/container.
#
__version__ = "$Revision: 592348 $"
__version__ = "$Revision: 619905 $"
__author__ = "Sebastien Binet <binet@cern.ch>, " \
"Attila Krasznahorkay <Attila.Krasznahorkay@cern.ch>"
......@@ -27,6 +27,10 @@ if __name__ == "__main__":
"--file",
dest = "fileName",
help = "The path to the POOL file to analyze" )
p( "-c",
"--csv",
dest = "csvFileName",
help = "Output CSV file name, to use with spreadsheets" )
( options, args ) = parser.parse_args()
fileNames = []
......@@ -46,6 +50,12 @@ if __name__ == "__main__":
fileNames = set( fileNames )
# Check the consistency with the CSV output:
if len( fileNames ) > 1 and options.csvFileName:
print( "WARNING CSV output is only available when processing a single "
"input file" )
pass
# Loop over the specified file(s):
for fileName in fileNames:
......@@ -159,6 +169,27 @@ if __name__ == "__main__":
( memSize, diskSize, "Total" ) )
print( "=" * 80 )
# Write out a CSV file if one was requested:
if options.csvFileName and ( len( fileNames ) == 1 ):
# Open the output file:
import csv
with open( options.csvFileName, "wb" ) as f:
writer = csv.writer( f )
# Set up the formatting of the file:
writer.writerow( [ "Name (Type)", "Size/Evt" ] )
# Write all entries to it:
for d in orderedData:
# Skip metadata items:
if d.nEntries != poolFile.dataHeader.nEntries: continue
# Construct the name of the entry:
nameType = "%s (%s)" % \
( d.name, ttree.GetBranch( d.name ).GetClassName() )
# Write the entry:
writer.writerow( [ nameType, d.diskSize / d.nEntries ] )
pass
pass
pass
if len(fileNames) > 1:
print ""
pass # loop over fileNames
......
#! /usr/bin/env python
# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
#
## Simple wrapper to invoke AthFileLite metadata grabber and
# produce AthFile-like text output
#
# $Id: dumpAthfilelite.py 618684 2014-09-26 11:46:14Z graemes $
import argparse
import os
import pprint
import sys
import PyUtils.AthFileLite as AthFileLite
def main():
parser = argparse.ArgumentParser(description="Use AthFileLite interface to retrieve file metadata. "
"Note that the '--type' argument is mandatory as AthFileLite will "
"not even try to guess the type of file.")
parser.add_argument('--type', '-t', metavar='FILETYPE',
help="Specifie filetype: POOL, BS or TAG", required=True,
choices=("POOL", "BS", "TAG"))
parser.add_argument('input_files', nargs="+", help="Input files")
args = vars(parser.parse_args(sys.argv[1:]))
for filename in args['input_files']:
if args["type"] == "POOL":
afl = AthFileLite.AthPoolFile(filename)
elif args["type"] == "BS":
afl = AthFileLite.AthBSFile(filename)
elif args["type"] == "TAG":
afl = AthFileLite.AthTagFile(filename)
metadata = afl.fileinfo
print "="*80
print filename
print "="*80
pprint.pprint(metadata)
print "="*80
if __name__ == "__main__":
main()
......@@ -18,6 +18,7 @@ alias checkxAOD checkxAOD.py
alias diffPoolFiles diffPoolFiles.py
alias merge-poolfiles merge-poolfiles.py
alias checkTag checkTag.py
alias checkMetaSG checkMetaSG.py
alias setupWorkArea setupWorkArea.py
alias pyroot pyroot.py
alias print_auditor_callgraph print_auditor_callgraph.py
......@@ -53,6 +54,7 @@ apply_pattern declare_scripts files="\
checkFile.py \
checkPlugins.py \
checkSG.py \
checkMetaSG.py \
checkTP.py \
checkTag.py \
checkxAOD.py \
......@@ -65,7 +67,8 @@ apply_pattern declare_scripts files="\
dlldep.py \
dso-stats.py \
dump-athfile.py \
filter-and-merge-d3pd.py \
dumpAthfilelite.py \
filter-and-merge-d3pd.py \
gen-typereg-dso.py \
gen_klass.py \
get-tag-diff.py \
......
This diff is collapsed.
......@@ -7,7 +7,7 @@
from __future__ import with_statement
__version__ = "$Revision: 588873 $"
__version__ = "$Revision: 635800 $"
__author__ = "Sebastien Binet"
__doc__ = "implementation of AthFile-server behind a set of proxies to isolate environments"
......@@ -393,8 +393,10 @@ class AthFileServer(object):
def _root_open(self, fname):
import PyUtils.Helpers as H
# speed-up by tampering LD_LIBRARY_PATH to not load reflex-dicts
import re
with H.restricted_ldenviron(projects=['AtlasCore']):
import re, os
restrictedProjects = ['AtlasCore']
if(os.environ.get("AtlasProject",None)=="AthAnalysisBase"): restrictedProjects=[] #special case for athanalysisbase
with H.restricted_ldenviron(projects=restrictedProjects):
with H.ShutUp(filters=[
re.compile(
'TClass::TClass:0: RuntimeWarning: no dictionary for.*'),
......@@ -482,7 +484,7 @@ class AthFileServer(object):
use_cache = False
sync_cache = True
if protocol in ('', 'file'):
if protocol in ('', 'file') :
fid = self.md5sum(fname)
fid_in_cache = fid in cache
# also check the cached name in case 2 identical files
......@@ -1023,7 +1025,10 @@ class FilePeeker(object):
def _root_open(self, fname, raw=False):
import PyUtils.Helpers as H
with H.restricted_ldenviron(projects=['AtlasCore']):
restrictedProjects = ['AtlasCore']
import os
if(os.environ.get("AtlasProject",None)=="AthAnalysisBase"): restrictedProjects=[] #special case for athanalysisbase
with H.restricted_ldenviron(projects=restrictedProjects):
root = self.pyroot
import re
with H.ShutUp(filters=[
......@@ -1061,7 +1066,10 @@ class FilePeeker(object):
runs=[]
evts=[]
import PyUtils.Helpers as H
with H.restricted_ldenviron(projects=['AtlasCore']):
restrictedProjects = ['AtlasCore']
import os
if(os.environ.get("AtlasProject",None)=="AthAnalysisBase"): restrictedProjects=[] #special case for athanalysisbase
with H.restricted_ldenviron(projects=restrictedProjects):
root = self.pyroot
do_close = True
if isinstance(fname, basestring):
......@@ -1106,9 +1114,15 @@ class FilePeeker(object):
for row in xrange(evtmax):
if coll_tree.GetEntry(row) < 0:
break
runnbr = coll_tree.RunNumber
# With root 5.34.22, trying to access leaves of a
# fundamental type like this gives an error:
# TypeError: attempt to bind ROOT object w/o class
# Rewrite like this for now to work around the problem.
#runnbr = coll_tree.RunNumber
runnbr = coll_tree.GetBranch('RunNumber').GetListOfLeaves()[0].GetValueLong64()
runs.append(runnbr)
evtnbr = coll_tree.EventNumber
#evtnbr = coll_tree.EventNumber
evtnbr = coll_tree.GetBranch('EventNumber').GetListOfLeaves()[0].GetValueLong64()
evts.append(evtnbr)
del coll_tree
if f and do_close:
......@@ -1119,7 +1133,10 @@ class FilePeeker(object):
def _is_empty_pool_file(self, fname):
is_empty = False
import PyUtils.Helpers as H
with H.restricted_ldenviron(projects=['AtlasCore']):
restrictedProjects = ['AtlasCore']
import os
if(os.environ.get("AtlasProject",None)=="AthAnalysisBase"): restrictedProjects=[] #special case for athanalysisbase
with H.restricted_ldenviron(projects=restrictedProjects):
root = self.pyroot
do_close = True
if isinstance(fname, basestring):
......@@ -1140,6 +1157,8 @@ class FilePeeker(object):
return is_empty
def _process_call(self, fname, evtmax, projects=['AtlasCore']):
import os
if(os.environ.get("AtlasProject",None)=="AthAnalysisBase"): projects=[] #special case for athanalysisbase
msg = self.msg()
import PyUtils.Helpers as H
f = _create_file_infos()
......@@ -1170,7 +1189,9 @@ class FilePeeker(object):
file_name,]
subprocess.call(cmd, env=self._sub_env)
#
with H.restricted_ldenviron(projects=None):
#with H.restricted_ldenviron(projects=None):
# MN: disabled clean environ to let ROOT6 find headers
if True:
is_tag, tag_ref, tag_guid, nentries, runs, evts = self._is_tag_file(f_root, evtmax)
if is_tag:
f['stream_names'] = ['TAG']
......@@ -1187,6 +1208,9 @@ class FilePeeker(object):
os.close(fd_pkl)
if os.path.exists(out_pkl_fname):
os.remove(out_pkl_fname)
print "\n --------- runnign Athena peeker"
print os.environ['CMTPATH']
import AthenaCommon.ChapPy as api
app = api.AthenaApp(cmdlineargs=["--nprocs=0"])
app << """
......
# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
# Lightweight and simplified version of AthFile
# As the transform knows which files are bytestream and which are
# POOL files we just have two simple classes and definately avoid
# doing anything fancy here
import os
import os.path
import re
import subprocess
import sys
import uuid
import PyUtils.dbsqlite as dbsqlite
from PyCmt.Logging import msg, logging
def _create_file_info_template():
"""simple helper function to create consistent dicts for the
fileinfos attribute of AthFile
"""
d = {
'file_md5sum': None,
'file_name': None,
'file_size': None,
'file_type': None,
'file_guid': None,
'nentries' : 0, # to handle empty files
'run_number': [],
'run_type': [],
'evt_type': [],
'evt_number': [],
'lumi_block': [],
'beam_energy': [],
'beam_type': [],
'stream_tags': [],
'metadata_items': None,
'eventdata_items': None,
'stream_names': None,
'geometry': None,
'conditions_tag': None,
'det_descr_tags': None,
##
'metadata': None,
'tag_info': None,
}
return d
def _urlType(filename):
if filename.startswith('dcap:'):
return 'dcap'
if filename.startswith('root:'):
return 'root'
if filename.startswith('rfio:'):
return 'rfio'
if filename.startswith('file:'):
return 'posix'
return 'posix'
def _get_file_size(filename):
if _urlType(filename) == 'posix':
try:
fsize = os.stat(filename)[6]
except IOError:
fsize = None
else:
from PyUtils.RootUtils import import_root
root = import_root()
try:
msg.debug('Calling TFile.Open for {0}'.format(filename))
file = root.TFile.Open(filename + '?filetype=raw', 'READ')
fsize = file.GetSize()
msg.debug('Got size {0} from TFile.GetSize'.format(fsize))
except ReferenceError:
msg.error('Failed to get size of {0}'.format(filename))
fsize = None
file.Close()
del root
return fsize
class AthPoolFile(object):
def __init__(self, filename):
self._filename = filename
if self._filename:
self._stub = os.path.basename(self._filename) + '-' + str(uuid.uuid4())
else:
self._stub = str(uuid.uuid4())
self._jobOptionsFile = self._stub + '-miniAthFile.py'
self._infoOutputFile = self._stub + '-miniAthFile.db'
self._logFile = self._stub + '-miniAthFile.log'
self._metadata = _create_file_info_template()
self._error = False
self.fopen()
def fopen(self):
self._writeMiniJobOptions()
self._runMiniAthena()
self._loadFileInfo()
self._metadata['file_type'] = 'pool'
self._metadata['file_size'] = _get_file_size(self._filename)
@property
def fileinfo(self):
return self._metadata
def _writeMiniJobOptions(self):
try:
jo = open(self._jobOptionsFile, "w")
print >>jo, os.linesep.join(("FNAME=['{filename}']",
"import os",
"os.environ['ATHENA_PROC_NUMBER'] = '0'",
"os.environ.pop('PYTHONINSPECT', None)",
"include('AthenaPython/athfile_peeker.py')",
"from AthenaCommon.AlgSequence import AlgSequence",
"job = AlgSequence()",
"job.peeker.outfname='{picklename}'",
"job.peeker.infname=FNAME[0]",
"import IOVDbSvc.IOVDb",
"theApp.EvtMax = 1")).format(filename=self._filename, picklename=self._infoOutputFile)
except Exception, e:
print >>sys.stderr, "Exception raised when writing JO file: {0}".format(e)
self._error = True
raise
def _runMiniAthena(self):
out = open(self._logFile, 'wb')
try:
athenv = os.environ.copy()
athenv["ATHENA_PROC_NUMBER"] = "0" # Suppress AthenaMP running
subprocess.check_call(['athena.py', self._jobOptionsFile], stdout=out, stderr=out, env=athenv)
except subprocess.CalledProcessError:
# Don't delete log files if errors occured
self._error = True
raise
def _loadFileInfo(self):
db = dbsqlite.open(self._infoOutputFile)
self._metadata = db['fileinfos']
def _getSize(self):
# FIXME Probably need to use ROOT for non-posix fs
try:
self._metadata['file_size'] = os.stat(self._filename)[6]
except IOError:
self._metadata['file_size'] = None
def __del__(self):
if ('AFDEBUG' not in os.environ) and (not self._error):
for fname in (self._jobOptionsFile, self._infoOutputFile, self._logFile):
try:
os.unlink(fname)
except (OSError, IOError):
pass
class AthBSFile(object):
def __init__(self, filename):
self._filename = filename
self._metadata = _create_file_info_template()
self.fopen()
def fopen(self):
self._process_bs_file(self._filename)
self._metadata['file_type'] = 'bs'
self._metadata['file_size'] = _get_file_size(self._filename)
@property
def fileinfo(self):
return self._metadata
def _process_bs_file (self, fname, evtmax=1, full_details=True):
import eformat as ef
data_reader = ef.EventStorage.pickDataReader(fname)
assert data_reader, \
'problem picking a data reader for file [%s]'%fname
beam_type = '<beam-type N/A>'
try:
beam_type = data_reader.beamType()
except Exception,err:
msg.warning ("problem while extracting beam-type information")
beam_energy = '<beam-energy N/A>'
try:
beam_energy = data_reader.beamEnergy()
except Exception,err:
msg.warning ("problem while extracting beam-type information")
bs = ef.istream(fname)
self._metadata['nentries'] = bs.total_events
bs_metadata = {}
for md in data_reader.freeMetaDataStrings():
if md.startswith('Event type:'):
k = 'evt_type'
v = []
if 'is sim' in md: v.append('IS_SIMULATION')
else: v.append('IS_DATA')
if 'is atlas' in md: v.append('IS_ATLAS')
else: v.append('IS_TESTBEAM')
if 'is physics' in md: v.append('IS_PHYSICS')
else: v.append('IS_CALIBRATION')
bs_metadata[k] = tuple(v)
elif md.startswith('GeoAtlas:'):
k = 'geometry'
v = md.split('GeoAtlas:')[1].strip()
bs_metadata[k] = v
elif md.startswith('IOVDbGlobalTag:'):
k = 'conditions_tag'
v = md.split('IOVDbGlobalTag:')[1].strip()
bs_metadata[k] = v
elif '=' in md:
k,v = md.split('=')
bs_metadata[k] = v
# for bwd/fwd compat...
# see: https://savannah.cern.ch/bugs/?73208
# needed for very old BS
for key_name,fn_name in (
('GUID','GUID'),
('Stream','stream'),
('Project', 'projectTag'),
('LumiBlock', 'lumiblockNumber'),
('run_number', 'runNumber'),
):
if key_name in bs_metadata:
# no need: already in bs metadata dict
continue
if hasattr(data_reader, fn_name):
bs_metadata[key_name] = getattr(data_reader, fn_name