Commit 238a61c5 authored by Graeme Stewart's avatar Graeme Stewart
Browse files

2014-11-07 Graeme Stewart <graeme.andrew.stewart@cern.ch>

	* AthFileLite.py
	- Fix __init__ for AthTagFile to take filename argument
	* Tagging as PyUtils-00-13-26-01 (PyUtils-00-13-26-01)
parent 069a6aec
#!/usr/bin/env python
# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
# @file: checkMetaSG.py
# @purpose: Print the metadata that would be accessible via the IOVMetaDataContainers of the MetaDataStore
# @author: Will Buttinger <will@cern.ch>
# @date: Oct 2014
#
# @example:
# @code
# checkMetaSG.py aod.pool.root
# @endcode
#
__version__ = "$Revision: 621253 $"
__author__ = "Will Buttinger <will@cern.ch>"
import sys
import os
from optparse import OptionParser
if __name__ == "__main__":
parser = OptionParser(usage="usage: %prog [options] [-f] my.file.pool")
parser.add_option( "-f",
"--file",
dest = "fileName",
help = "The path to the POOL file to analyze" )
parser.add_option( "-o",
"--output",
dest = "outFileName",
default = None,
help = "Name of the output file which will contain the informations gathered during checkSG processing. These informations will be stored into a python-shelve or an ASCII/py file (depending on the extension: .pkl,.dat -> shelve; everything else -> ASCII/py)" )
(options, args) = parser.parse_args()
fileNames = []
if len(args) > 0:
fileNames = [ arg for arg in args if arg[0] != "-" ]
pass
if options.fileName == None and len(fileNames) == 0:
str(parser.print_help() or "")
sys.exit(1)
if not (options.fileName is None):
fileName = os.path.expandvars(os.path.expanduser(options.fileName))
fileNames.append(fileName)
fileNames = set( fileNames )
sc = 0
for fileName in fileNames:
try:
from PyUtils import AthFile
print "## checking [%s]..."%fileName
metadata = AthFile.fopen(fileName).fileinfos['metadata']
print "="*91
print "%30s%-28s%-10s%-30s" % ("folder", " | key "," | type "," | value")
print "%30s%s%-25s%s%-7s%s%-30s" % ("-"*30, "-+-", "-"*(28-3),"-+-","-"*(10-3),"-+-","-"*(20))
for metaFolder,metaObj in metadata.items(): #metaObj may be dict, list (occurs with multi IOV), or none... so far only support dict FIXME
first=True
if isinstance(metaObj,dict):
for metaKey,metaValue in metaObj.items():
if first: print "%30s%s%-25s%s%-7s%s%-30s" % (metaFolder, " | ", metaKey," | ",type(metaValue).__name__," | ",metaValue) #print "%30s%s%-30s" % (metaFolder, " | ",metaKey+" = "+str(metaValue) )
else: print "%30s%s%-25s%s%-7s%s%-30s" % ("", " | ", metaKey," | ",type(metaValue).__name__," | ",metaValue)
first=False
print "="*91
if options.outFileName:
osp = os.path
outFileName = options.outFileName
outFileName = osp.expanduser(outFileName)
outFileName = osp.expandvars(outFileName)
print "## saving checkSG report into [%s]..." % outFileName
if os.path.splitext(outFileName)[1] in ('.pkl', '.dat'):
# we explicitely import 'bsddb' to try to always
# get that particular backend for the shelve...
import bsddb
import shelve
if os.path.exists(outFileName):
os.remove(outFileName)
db = shelve.open(outFileName)
db['eventdata_items'] = ks
db.close()
except Exception, e:
print "## Caught exception [%s] !!" % str(e.__class__)
print "## What:",e
print sys.exc_info()[0]
print sys.exc_info()[1]
sc = 1
pass
except :
print "## Caught something !! (don't know what)"
print sys.exc_info()[0]
print sys.exc_info()[1]
sc = 10
pass
if len(fileNames) > 1:
print ""
pass # loop over fileNames
print "## Bye."
sys.exit(sc)
......@@ -2,14 +2,14 @@
# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
#
# $Id: checkxAOD.py 592348 2014-04-10 12:06:41Z krasznaa $
# $Id: checkxAOD.py 619905 2014-10-03 16:11:12Z krasznaa $
#
# This is a modified version of PyUtils/bin/checkFile.py. It has been taught
# how to sum up the sizes of all the branches belonging to a single xAOD
# object/container.
#
__version__ = "$Revision: 592348 $"
__version__ = "$Revision: 619905 $"
__author__ = "Sebastien Binet <binet@cern.ch>, " \
"Attila Krasznahorkay <Attila.Krasznahorkay@cern.ch>"
......@@ -27,6 +27,10 @@ if __name__ == "__main__":
"--file",
dest = "fileName",
help = "The path to the POOL file to analyze" )
p( "-c",
"--csv",
dest = "csvFileName",
help = "Output CSV file name, to use with spreadsheets" )
( options, args ) = parser.parse_args()
fileNames = []
......@@ -46,6 +50,12 @@ if __name__ == "__main__":
fileNames = set( fileNames )
# Check the consistency with the CSV output:
if len( fileNames ) > 1 and options.csvFileName:
print( "WARNING CSV output is only available when processing a single "
"input file" )
pass
# Loop over the specified file(s):
for fileName in fileNames:
......@@ -159,6 +169,27 @@ if __name__ == "__main__":
( memSize, diskSize, "Total" ) )
print( "=" * 80 )
# Write out a CSV file if one was requested:
if options.csvFileName and ( len( fileNames ) == 1 ):
# Open the output file:
import csv
with open( options.csvFileName, "wb" ) as f:
writer = csv.writer( f )
# Set up the formatting of the file:
writer.writerow( [ "Name (Type)", "Size/Evt" ] )
# Write all entries to it:
for d in orderedData:
# Skip metadata items:
if d.nEntries != poolFile.dataHeader.nEntries: continue
# Construct the name of the entry:
nameType = "%s (%s)" % \
( d.name, ttree.GetBranch( d.name ).GetClassName() )
# Write the entry:
writer.writerow( [ nameType, d.diskSize / d.nEntries ] )
pass
pass
pass
if len(fileNames) > 1:
print ""
pass # loop over fileNames
......
#! /usr/bin/env python
# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
#
## Simple wrapper to invoke AthFileLite metadata grabber and
# produce AthFile-like text output
#
# $Id: dumpAthfilelite.py 618684 2014-09-26 11:46:14Z graemes $
import argparse
import os
import pprint
import sys
import PyUtils.AthFileLite as AthFileLite
def main():
parser = argparse.ArgumentParser(description="Use AthFileLite interface to retrieve file metadata. "
"Note that the '--type' argument is mandatory as AthFileLite will "
"not even try to guess the type of file.")
parser.add_argument('--type', '-t', metavar='FILETYPE',
help="Specifie filetype: POOL, BS or TAG", required=True,
choices=("POOL", "BS", "TAG"))
parser.add_argument('input_files', nargs="+", help="Input files")
args = vars(parser.parse_args(sys.argv[1:]))
for filename in args['input_files']:
if args["type"] == "POOL":
afl = AthFileLite.AthPoolFile(filename)
elif args["type"] == "BS":
afl = AthFileLite.AthBSFile(filename)
elif args["type"] == "TAG":
afl = AthFileLite.AthTagFile(filename)
metadata = afl.fileinfo
print "="*80
print filename
print "="*80
pprint.pprint(metadata)
print "="*80
if __name__ == "__main__":
main()
......@@ -18,6 +18,7 @@ alias checkxAOD checkxAOD.py
alias diffPoolFiles diffPoolFiles.py
alias merge-poolfiles merge-poolfiles.py
alias checkTag checkTag.py
alias checkMetaSG checkMetaSG.py
alias setupWorkArea setupWorkArea.py
alias pyroot pyroot.py
alias print_auditor_callgraph print_auditor_callgraph.py
......@@ -53,6 +54,7 @@ apply_pattern declare_scripts files="\
checkFile.py \
checkPlugins.py \
checkSG.py \
checkMetaSG.py \
checkTP.py \
checkTag.py \
checkxAOD.py \
......@@ -65,7 +67,8 @@ apply_pattern declare_scripts files="\
dlldep.py \
dso-stats.py \
dump-athfile.py \
filter-and-merge-d3pd.py \
dumpAthfilelite.py \
filter-and-merge-d3pd.py \
gen-typereg-dso.py \
gen_klass.py \
get-tag-diff.py \
......
......@@ -7,7 +7,7 @@
from __future__ import with_statement
__version__ = "$Revision: 588873 $"
__version__ = "$Revision: 621178 $"
__author__ = "Sebastien Binet"
__doc__ = "implementation of AthFile-server behind a set of proxies to isolate environments"
......@@ -393,8 +393,10 @@ class AthFileServer(object):
def _root_open(self, fname):
import PyUtils.Helpers as H
# speed-up by tampering LD_LIBRARY_PATH to not load reflex-dicts
import re
with H.restricted_ldenviron(projects=['AtlasCore']):
import re, os
restrictedProjects = ['AtlasCore']
if(os.environ.get("AtlasProject",None)=="AthAnalysisBase"): restrictedProjects=[] #special case for athanalysisbase
with H.restricted_ldenviron(projects=restrictedProjects):
with H.ShutUp(filters=[
re.compile(
'TClass::TClass:0: RuntimeWarning: no dictionary for.*'),
......@@ -1023,7 +1025,10 @@ class FilePeeker(object):
def _root_open(self, fname, raw=False):
import PyUtils.Helpers as H
with H.restricted_ldenviron(projects=['AtlasCore']):
restrictedProjects = ['AtlasCore']
import os
if(os.environ.get("AtlasProject",None)=="AthAnalysisBase"): restrictedProjects=[] #special case for athanalysisbase
with H.restricted_ldenviron(projects=restrictedProjects):
root = self.pyroot
import re
with H.ShutUp(filters=[
......@@ -1061,7 +1066,10 @@ class FilePeeker(object):
runs=[]
evts=[]
import PyUtils.Helpers as H
with H.restricted_ldenviron(projects=['AtlasCore']):
restrictedProjects = ['AtlasCore']
import os
if(os.environ.get("AtlasProject",None)=="AthAnalysisBase"): restrictedProjects=[] #special case for athanalysisbase
with H.restricted_ldenviron(projects=restrictedProjects):
root = self.pyroot
do_close = True
if isinstance(fname, basestring):
......@@ -1119,7 +1127,10 @@ class FilePeeker(object):
def _is_empty_pool_file(self, fname):
is_empty = False
import PyUtils.Helpers as H
with H.restricted_ldenviron(projects=['AtlasCore']):
restrictedProjects = ['AtlasCore']
import os
if(os.environ.get("AtlasProject",None)=="AthAnalysisBase"): restrictedProjects=[] #special case for athanalysisbase
with H.restricted_ldenviron(projects=restrictedProjects):
root = self.pyroot
do_close = True
if isinstance(fname, basestring):
......@@ -1140,6 +1151,8 @@ class FilePeeker(object):
return is_empty
def _process_call(self, fname, evtmax, projects=['AtlasCore']):
import os
if(os.environ.get("AtlasProject",None)=="AthAnalysisBase"): projects=[] #special case for athanalysisbase
msg = self.msg()
import PyUtils.Helpers as H
f = _create_file_infos()
......
# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
# Lightweight and simplified version of AthFile
# As the transform knows which files are bytestream and which are
# POOL files we just have two simple classes and definately avoid
# doing anything fancy here
import os
import os.path
import re
import subprocess
import sys
import uuid
import PyUtils.dbsqlite as dbsqlite
from PyCmt.Logging import msg, logging
def _create_file_info_template():
"""simple helper function to create consistent dicts for the
fileinfos attribute of AthFile
"""
d = {
'file_md5sum': None,
'file_name': None,
'file_size': None,
'file_type': None,
'file_guid': None,
'nentries' : 0, # to handle empty files
'run_number': [],
'run_type': [],
'evt_type': [],
'evt_number': [],
'lumi_block': [],
'beam_energy': [],
'beam_type': [],
'stream_tags': [],
'metadata_items': None,
'eventdata_items': None,
'stream_names': None,
'geometry': None,
'conditions_tag': None,
'det_descr_tags': None,
##
'metadata': None,
'tag_info': None,
}
return d
def _urlType(filename):
if filename.startswith('dcap:'):
return 'dcap'
if filename.startswith('root:'):
return 'root'
if filename.startswith('rfio:'):
return 'rfio'
if filename.startswith('file:'):
return 'posix'
return 'posix'
def _get_file_size(filename):
if _urlType(filename) == 'posix':
try:
fsize = os.stat(filename)[6]
except IOError:
fsize = None
else:
from PyUtils.RootUtils import import_root
root = import_root()
try:
msg.debug('Calling TFile.Open for {0}'.format(filename))
file = root.TFile.Open(filename + '?filetype=raw', 'READ')
fsize = file.GetSize()
msg.debug('Got size {0} from TFile.GetSize'.format(fsize))
except ReferenceError:
msg.error('Failed to get size of {0}'.format(filename))
fsize = None
file.Close()
del root
return fsize
class AthPoolFile(object):
def __init__(self, filename):
self._filename = filename
if self._filename:
self._stub = os.path.basename(self._filename) + '-' + str(uuid.uuid4())
else:
self._stub = str(uuid.uuid4())
self._jobOptionsFile = self._stub + '-miniAthFile.py'
self._infoOutputFile = self._stub + '-miniAthFile.db'
self._logFile = self._stub + '-miniAthFile.log'
self._metadata = _create_file_info_template()
self.fopen()
def fopen(self):
self._writeMiniJobOptions()
self._runMiniAthena()
self._loadFileInfo()
self._metadata['file_type'] = 'pool'
self._metadata['file_size'] = _get_file_size(self._filename)
@property
def fileinfo(self):
return self._metadata
def _writeMiniJobOptions(self):
try:
jo = open(self._jobOptionsFile, "w")
print >>jo, os.linesep.join(("FNAME=['{filename}']",
"import os",
"os.environ['ATHENA_PROC_NUMBER'] = '0'",
"os.environ.pop('PYTHONINSPECT', None)",
"include('AthenaPython/athfile_peeker.py')",
"from AthenaCommon.AlgSequence import AlgSequence",
"job = AlgSequence()",
"job.peeker.outfname='{picklename}'",
"job.peeker.infname=FNAME[0]",
"import IOVDbSvc.IOVDb",
"theApp.EvtMax = 1")).format(filename=self._filename, picklename=self._infoOutputFile)
except Exception, e:
print >>sys.stderr, "Exception raised when writing JO file: {0}".format(e)
raise
def _runMiniAthena(self):
out = open(self._logFile, 'wb')
subprocess.check_call(['athena.py', self._jobOptionsFile], stdout=out, stderr=out)
def _loadFileInfo(self):
db = dbsqlite.open(self._infoOutputFile)
self._metadata = db['fileinfos']
def _getSize(self):
# FIXME Probably need to use ROOT for non-posix fs
try:
self._metadata['file_size'] = os.stat(self._filename)[6]
except IOError:
self._metadata['file_size'] = None
def __del__(self):
if 'AFDEBUG' not in os.environ:
for file in (self._jobOptionsFile, self._infoOutputFile, self._logFile):
try:
os.unlink(file)
except OSError, IOError:
pass
class AthBSFile(object):
def __init__(self, filename):
self._filename = filename
self._metadata = _create_file_info_template()
self.fopen()
def fopen(self):
self._process_bs_file(self._filename)
self._metadata['file_type'] = 'bs'
self._metadata['file_size'] = _get_file_size(self._filename)
@property
def fileinfo(self):
return self._metadata
def _process_bs_file (self, fname, evtmax=1, full_details=True):
import eformat as ef
data_reader = ef.EventStorage.pickDataReader(fname)
assert data_reader, \
'problem picking a data reader for file [%s]'%fname
beam_type = '<beam-type N/A>'
try:
beam_type = data_reader.beamType()
except Exception,err:
msg.warning ("problem while extracting beam-type information")
beam_energy = '<beam-energy N/A>'
try:
beam_energy = data_reader.beamEnergy()
except Exception,err:
msg.warning ("problem while extracting beam-type information")
bs = ef.istream(fname)
self._metadata['nentries'] = bs.total_events
bs_metadata = {}
for md in data_reader.freeMetaDataStrings():
if md.startswith('Event type:'):
k = 'evt_type'
v = []
if 'is sim' in md: v.append('IS_SIMULATION')
else: v.append('IS_DATA')
if 'is atlas' in md: v.append('IS_ATLAS')
else: v.append('IS_TESTBEAM')
if 'is physics' in md: v.append('IS_PHYSICS')
else: v.append('IS_CALIBRATION')
bs_metadata[k] = tuple(v)
elif md.startswith('GeoAtlas:'):
k = 'geometry'
v = md.split('GeoAtlas:')[1].strip()
bs_metadata[k] = v
elif md.startswith('IOVDbGlobalTag:'):
k = 'conditions_tag'
v = md.split('IOVDbGlobalTag:')[1].strip()
bs_metadata[k] = v
elif '=' in md:
k,v = md.split('=')
bs_metadata[k] = v
# for bwd/fwd compat...
# see: https://savannah.cern.ch/bugs/?73208
# needed for very old BS
for key_name,fn_name in (
('GUID','GUID'),
('Stream','stream'),
('Project', 'projectTag'),
('LumiBlock', 'lumiblockNumber'),
('run_number', 'runNumber'),
):
if key_name in bs_metadata:
# no need: already in bs metadata dict
continue
if hasattr(data_reader, fn_name):
bs_metadata[key_name] = getattr(data_reader, fn_name)()
self._metadata['file_guid'] = bs_metadata.get('GUID', None)
self._metadata['evt_type'] = bs_metadata.get('evt_type', [])
self._metadata['geometry'] = bs_metadata.get('geometry', None)
self._metadata['conditions_tag'] = bs_metadata.get('conditions_tag', None)
self._metadata['bs_metadata'] = bs_metadata
if not data_reader.good():
# event-less file...
self._metadata['run_number'].append(bs_metadata.get('run_number', 0))
self._metadata['lumi_block'].append(bs_metadata.get('LumiBlock', 0))
return
if evtmax == -1:
evtmax = nentries
ievt = iter(bs)
for i in xrange(evtmax):
try:
evt = ievt.next()
evt.check() # may raise a RuntimeError
stream_tags = [dict(stream_type=tag.type,
stream_name=tag.name,
obeys_lbk=bool(tag.obeys_lumiblock))
for tag in evt.stream_tag()]
self._metadata['run_number'].append(evt.run_no())
self._metadata['evt_number'].append(evt.global_id())
self._metadata['lumi_block'].append(evt.lumi_block())
self._metadata['run_type'].append(ef.helper.run_type2string(evt.run_type()))
self._metadata['beam_type'].append(beam_type)
self._metadata['beam_energy'].append(beam_energy)
self._metadata['stream_tags'].extend(stream_tags)
except RuntimeError, err:
print "** WARNING ** detected a corrupted bs-file:\n",err
class AthTagFile(object):
def __init__(self, filename):
self._filename = filename
self._metadata = _create_file_info_template()
self.fopen()
def fopen(self):
self._process_tag_file()
self._metadata['file_type'] = 'tag'
self._metadata['file_size'] = _get_file_size(self._filename)
@property
def fileinfo(self):
return self._metadata
def _process_tag_file(self, evtmax=1):
tag_ref= None
tag_guid=None
nentries = 0
runs=[]