Skip to content
Snippets Groups Projects
Commit 43e6fe51 authored by danciubotaru's avatar danciubotaru
Browse files

added the inital files required for this project

parent ed383013
No related branches found
No related tags found
1 merge request!20779WIP: Migrate DataQualityTools to ToolHandles
#!/usr/bin/env python
# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
# This script reads metadata from a given file
import sys
import json
import argparse
import time
import logging
msg = logging.getLogger('MetaReader')
from PyUtils.MetaReader import read_metadata
def __pretty_print(content, indent=4, fd=sys.stdout, level=0):
# print(type(content))
if isinstance(content, dict):
for key, value in content.items():
print >> fd,' ' * indent * level + str(key)
__pretty_print(value, indent, fd, level + 1)
elif isinstance(content, list):
for value in content:
__pretty_print(value, indent, fd, level + 1)
elif isinstance(content, tuple):
for value in content:
__pretty_print(value, indent, fd, level + 1)
else:
print >> fd, ' ' * indent * (level + 1) + str(content)
def __main():
# Parsing the arguments provided by user
parser = argparse.ArgumentParser(description='This script reads metadata from a given file')
parser.add_argument('filenames',
nargs = '+',
help= 'The filenames to read. User can provide a single file or a list of files.')
parser.add_argument('-v',
'--verbose',
action='store_true',
help='print detailed output on screen')
parser.add_argument('-o',
'--output',
metavar='FILE',
default=None,
help="Saves the output in a file. By default, the output is written on the screen (stdout) in a prettier format for better readabiilty.")
parser.add_argument('--json',
action='store_true',
help="Sets the output file format as json.")
parser.add_argument('--indent',
metavar='N',
type=int,
default=2,
help="Sets the indent spaces in the output either on screen (without -o flag) either on file (with -o flag). By default, uses two spaces as indent.")
parser.add_argument('-f',
'--full',
action='store_true',
default=False,
help="Retrieve the full set of metadata from file.")
parser.add_argument('-t',
'--type',
default= None,
metavar='TYPE',
type=str,
choices=['POOL', 'BS'],
help="The file type of the input filename. By default, it tries to determine itself the file type of the input.")
args = parser.parse_args()
verbose = args.verbose
filenames = args.filenames
output = args.output
is_json = args.json
indent = args.indent
is_full = args.full
file_type = args.type
if verbose:
msg.setLevel(logging.INFO)
# create a stream handler
handler = logging.StreamHandler()
handler.setLevel(logging.INFO)
# create a logging format
formatter = logging.Formatter('%(name)s %(levelname)s %(message)s')
handler.setFormatter(formatter)
# add the handlers to the logger
msg.addHandler(handler)
startTime = time.time()
msg.info('Imported headers in: {0} miliseconds'.format((time.time() - startTime) * 1e3))
msg.info('The output file is: {0}'.format(output))
d = read_metadata(filenames, file_type, full= is_full)
if output is None:
if is_json:
print json.dumps(d, indent=indent)
else:
__pretty_print(d, indent=indent)
else:
if is_json:
with open(output, 'w') as fd:
print >> fd, json.dumps(d, indent=indent)
else:
with open(output, 'w') as fd:
__pretty_print(d, indent=indent, fd=fd)
msg.info('Done!')
if __name__ == '__main__':
__main()
# MetaReader
This utility extracts metadata from a given ATLAS file.
MetaReader can be run as a "standalone" file or as a module (library).
(last updated on 6.02.2018)
---
## 1. Standalone version
### How to use:
To view all available arguments run:
```bash
python meta-reader.py -h
```
to run it:
```bash
python meta-reader.py filename1 filename2
```
_The user can give as input a file or a list of files._
### Arguments
* #### -h, --help
_Displays the help information._
Example:
```bash
python meta-reader.py -h
```
* #### -v, --verbose
_Displays some extra information on the screen._
Example:
```bash
python meta-reader.py AOD.05352803._000417.pool.root.1 -v
```
* #### -o, --output
_The user can specify the name of the output filename. By default, the output is written on the screen (stdout) in a prettier format for better readability._
Example:
```bash
python meta-reader.py AOD.05352803._000417.pool.root.1 -o outFileName.txt
```
* #### --json
_Sets the output file format as `json`._
Example:
```bash
python meta-reader.py AOD.05352803._000417.pool.root.1 -o json.txt --json
```
* #### --indent
_Sets the indent spaces in the output file either on screen (without `-o` flag) either on file (with `-o` flag). By default, it uses two spaces as indent._
Example:
```bash
python meta-reader.py AOD.05352803._000417.pool.root.1 -o json.txt --json --indent 4
```
* #### -f, --full
_Retrieve the full set of metadata from a file. By default, this is set to False and from a given file only the following keys: 'file_guid', 'file_size', 'file_type', 'nentries'._
Example:
```bash
python meta-reader.py AOD.05352803._000417.pool.root.1 -f
```
Output:
``` python
{'AOD.05352803._000417.pool.root.1':
{
'file_guid': '406B5C0F-5386-E511-A367-02163E00E173',
'file_size': 1173306136,
'file_type': 'POOL',
'nentries': 1260
}
}
```
* #### -t, --type
_The file type of the input filename. It is optional because ```file_type``` is determined automatically. If the user wants to specify the ```file_type``` he has two options: POOL or BS (BS stands for bytestream files: RAW or DRAW)._
Example:
```bash
python meta-reader.py AOD.05352803._000417.pool.root.1 -t POOL
```
---
## 2. Module Version
### How to use:
Include in your file the following statement:
```python
from PyUtils.MetaReader import read_metadata
```
The `MetaReader.py` must be in the same directory to be used in this way.
If the `MetaReader.py` is located in other directory you need to set the system path using the following command:
```python
sys.path.append('/path/to/MetaReader.py')
```
### Parameters
```python
def read_metadata(filename, file_type=None, full=False):
```
* #### -filenames
_The name of input files. The user can provide a file or a list of files._
* #### -file_type
_The file type of the input filename. The user has two options: POOL or BS (BS stands for bytestream files: RAW or DRAW)._
* #### full
_By default `full=False` and returns a "lite" version of metadata with only this heys: 'file_guid', 'file_size', 'file_type', 'nentries'.
If `full=True` this will return all metadata associated with the filename.
Example:
```python
read_metadata('AOD.05352803._000417.pool.root.1', file_type=None, full=False)
```
---
\ No newline at end of file
import os
import logging
msg = logging.getLogger(__name__)
import re
import ast
import sys
def read_metadata(filenames, file_type=None, full=False):
"""
This tool is independent of Athena framework and returns the metadata from a given file.
:param filename: the input file from which metadata needs to be extracted.
:param file_type: the type of file. POOL or BS (bytestream: RAW, DRAW) files.
:param full: if true, will return all metadata associated with the filename. By default, is false and this will
return a "lite" version which have only the following keys: 'file_guid', 'file_size', 'file_type', 'nentries'.
:return: a dictionary of metadata for the given input file.
"""
# Check if the input is a filename or a list of filenames.
if isinstance(filenames, basestring):
filenames = [filenames]
# create the storage object for metadata.
metaDict = {}
for filename in filenames:
current_file_type = None
# Determine the file type of the input.
if not file_type:
with open(filename, 'rb') as binary_file:
magic_file = binary_file.read(4)
if magic_file == 'root':
current_file_type = 'POOL'
# check if ROOT module is not imported previously.
if 'ROOT' not in sys.modules:
global ROOT
import ROOT
else:
current_file_type = 'BS'
# check if 'eformat' module is not imported previously.
if 'eformat' not in sys.modules:
global eformat
import eformat
metaDict[filename] = {
'file_size': os.path.getsize(filename),
}
# ----------------------------------------------------------------------------------------------------------------#
# retrieves metadata from POOL files.
if current_file_type == 'POOL':
from CLIDComps.clidGenerator import clidGenerator
clidgen = clidGenerator(db = None)
evt = ROOT.POOL.TEvent()
evt.readFrom(filename)
evt.getEntry(0)
# add the missing keys from the basis "lite" metadata dictionary
metaDict[filename]['file_guid'] = __read_guid(filename),
metaDict[filename]['file_type'] = 'POOL'
metaDict[filename]['nentries'] = evt.getEntries()
# if the flag full is set to true then grab all metadata
# ----------------------------------------------------------------------------------------------------------------#
if full:
metaDict[';00;MetaDataSvc'] = __convert_DataHeader(evt.retrieveMetaInput('DataHeader', ';00;MetaDataSvc'))
for name, cls in metaDict[';00;MetaDataSvc']:
try:
a = evt.retrieveMetaInput(cls, name)
except LookupError:
continue
if cls == 'IOVMetaDataContainer':
metaDict[name] = __convert_IOVMetaDataContainer(a)
if cls == 'xAOD::EventFormat':
metaDict[name] = __convert_EventFormat(a)
if cls == 'EventStreamInfo':
metaDict[name] = __convert_EventStreamInfo(a)
# ----------------------------------------------------------------------------------------------------------------#
# retrieves metadata from bytestream (BS) files (RAW, DRAW)
elif current_file_type == 'BS':
# store the file_type of the input filename
metaDict[filename]['file_type'] = 'bs'
# store the number of entries
bs = eformat.istream(filename)
metaDict[filename]['nentries'] = bs.total_events
# store the 'guid' value
data_reader = eformat.EventStorage.pickDataReader(filename)
assert data_reader, 'problem picking a data reader for file [%s]' % filename
if hasattr(data_reader, 'GUID'):
metaDict[filename]['file_guid'] = getattr(data_reader, 'GUID')()
# if the flag full is set to true then grab all metadata
# ----------------------------------------------------------------------------------------------------------------#
if full:
bs_metadata = {}
for md in data_reader.freeMetaDataStrings():
if md.startswith('Event type:'):
k = 'evt_type'
v = []
if 'is sim' in md:
v.append('IS_SIMULATION')
else:
v.append('IS_DATA')
if 'is atlas' in md:
v.append('IS_ATLAS')
else:
v.append('IS_TESTBEAM')
if 'is physics' in md:
v.append('IS_PHYSICS')
else:
v.append('IS_CALIBRATION')
bs_metadata[k] = tuple(v)
elif md.startswith('GeoAtlas:'):
k = 'geometry'
v = md.split('GeoAtlas:')[1].strip()
bs_metadata[k] = v
elif md.startswith('IOVDbGlobalTag:'):
k = 'conditions_tag'
v = md.split('IOVDbGlobalTag:')[1].strip()
bs_metadata[k] = v
elif '=' in md:
k, v = md.split('=')
bs_metadata[k] = v
for key_name, fn_name in (
('Stream', 'stream'),
('Project', 'projectTag'),
('LumiBlock', 'lumiblockNumber'),
('run_number', 'runNumber'),
):
if key_name in bs_metadata:
# no need: already in bs metadata dict
continue
if hasattr(data_reader, fn_name):
bs_metadata[key_name] = getattr(data_reader, fn_name)()
metaDict[filename]['evt_type'] = bs_metadata.get('evt_type', [])
metaDict[filename]['geometry'] = bs_metadata.get('geometry', None)
metaDict[filename]['conditions_tag'] = bs_metadata.get('conditions_tag', None)
metaDict[filename].update(bs_metadata)
if not data_reader.good():
# event-less file...
metaDict[filename]['run_number'].append(bs_metadata.get('run_number', 0))
metaDict[filename]['lumi_block'].append(bs_metadata.get('LumiBlock', 0))
# ----------------------------------------------------------------------------------------------------------------#
# Thow an error if the user provide other file types
else:
msg.error('Unknown filetype for {0} - there is no metadata interface for type {1}'.format(filename, current_file_type))
return None
return metaDict
# Methods for POOL
def __convert_EventStreamInfo(esi):
d = {}
d['run_number'] = list(esi.getRunNumbers())
d['processing_tags'] = list(esi.getProcessingTags())
d['lumi_blocks'] = list(esi.getLumiBlockNumbers())
d['event_types'] = []
for evtype in esi.getEventTypes():
t = {}
t['IS_CALIBRATION'] = evtype.IS_CALIBRATION
t['IS_SIMULATION'] = evtype.IS_SIMULATION
t['IS_TESTBEAM'] = evtype.IS_TESTBEAM
t['mc_channel_number'] = evtype.mc_channel_number()
t['mc_event_number'] = evtype.mc_event_number()
t['detdescr_tags'] = evtype.get_detdescr_tags()
d['event_types'].append(t)
d['ItemList'] = []
for e in esi.getItemList():
d['ItemList'].append((clidgen.getNameFromClid(e.first), e.second))
return d
def __convert_IOVMetaDataContainer(iovmetadata):
return [__convert_CondAttributeList(e) for e in iovmetadata.payloadContainer()]
def __convert_CondAttributeList(condattrlist):
r = []
for i in range(0, condattrlist.size()):
chanNum = condattrlist.chanNum(i)
d = dict(condattrlist.attributeList(chanNum))
chanName = condattrlist.chanName(chanNum)
if chanName is not '':
d['chanName'] = chanName
for k, v in d.iteritems():
try: # to cast the strings into relevant Python types/data structures
d[k] = ast.literal_eval(v)
except:
pass
r.append(d)
return r
def __convert_EventFormat(evfmt):
return [(e.first, e.second.className()) for e in evfmt]
def __convert_DataHeader(dataHeader):
return [ (x.getKey(), clidgen.getNameFromClid(x.getPrimaryClassID())) for x in dataHeader]
# Currently not used
# def __md5(fname, block_size=2**20, do_fast_md5=True):
# import hashlib
# # do_fast_md5 is for compatibility with AthFile.
# # -- Sebastian Liem
# hash_md5 = hashlib.md5()
# with open(fname, 'rb') as f:
# for chunk in iter(lambda: f.read(block_size), b''):
# hash_md5.update(chunk)
# if do_fast_md5:
# break
# return hash_md5.hexdigest()
def __read_guid(filename):
"""
Extracts the "guid" (Globally Unique Identfier in POOL files and Grid catalogs) value from a POOL file.
:param filename: the input file
:return: the guid value
"""
root_file = ROOT.TFile(filename)
params = root_file.Get('##Params')
regex = re.compile(r'^\[NAME\=([a-zA-Z0-9\_]+)\]\[VALUE\=(.*)\]')
for i in range(params.GetEntries()):
params.GetEntry(i)
param = params.db_string
result = regex.match(param)
if result:
name = result.group(1)
value = result.group(2)
if name == 'FID':
return value
return None
# Methos for BS
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment