AodMetaDataReader.py 8.86 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python

# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
#
# Extract meta-data from xAODs -- without the help of Athena.
#

import argparse
import array
import ast
import json
import numbers
import os
import pickle
import re
import shutil
import sys
import tempfile


if not __name__ == '__main__':
   # We can't risk loading our dictionaries into your PyROOT process. So please, use the subprocess
   # module and the pickle interface. (If someone asks, we might implement a helper function.)
   raise ImportError('AodMetaDataReader cannot be loaded as a module')


# supported types of meta-data values (storage type, member name in IOVPayloadContainer, display type)
payloadItemDescs = [
   None,
   ('bool', 'm_bool', bool),
   ('char', 'm_char', int),
   ('unsigned char', 'm_unsignedChar', int),
   ('short', 'm_short', int),
   ('unsigned short', 'm_unsignedShort', int),
   ('int', 'm_int', int),
   ('unsigned int', 'm_unsignedInt', int),
   ('long', 'm_long', int),
   ('unsigned long', 'm_unsignedLong', int),
   ('Long64_t', 'm_longLong', int),
   ('ULong64_t', 'm_unsignedLongLong', int),
   ('float', 'm_float', float),
   ('double', 'm_double', float),
   ('std::string', 'm_string', str),
   ('ULong64_t', 'm_date', int),
   ('ULong64_t', 'm_timeStamp', int),
]
# whether the dictionary has been loaded
loadedDictionary = False


# create minimal header for reading persistent objects representing xAOD meta-data
def writeDictionaryHeader(f):
   f.write('''
#include <string>
#include <vector>
#include <Rtypes.h>
''')
   f.write('''
class AttrListIndexes {
public:
   int getTypeIndex() const { return (m_typeIndex & 0x3f); }
   int getNameIndex() const { return (m_typeIndex >> 6); }
   int getObjIndex() const { return m_objIndex; }
private:
   unsigned short m_typeIndex;
   unsigned short m_objIndex;
};
''')
   f.write('struct IOVPayloadContainer_p1 {\n')
   f.write('   std::vector<AttrListIndexes> m_attrIndexes;')
   for payloadItemDesc in payloadItemDescs:
      if not payloadItemDesc:
         continue
      f.write('   std::vector<%s> %s;\n' % (payloadItemDesc[:2]))
   f.write('   std::vector<std::string> m_attrName;')
   f.write('};\n')
   f.write('''
struct IOVMetaDataContainer_p1 {
   std::string m_folderName;
   std::string m_folderDescription;
   IOVPayloadContainer_p1 m_payload;
};
''')


# load classes for reading persistent objects representing xAOD meta-data
def loadDictionary():
   global loadedDictionary
   if loadedDictionary:
      return
   loadedDictionary = True
   tempPath = tempfile.mkdtemp()
   try:
      headerPath = os.path.join(tempPath, 'AodMetaData.h')
95
      with open(headerPath, 'w') as f:
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
         writeDictionaryHeader(f)
      ROOT.gROOT.LoadMacro('%s+' % headerPath)
   finally:
      shutil.rmtree(tempPath, ignore_errors=True)


def extractMetaData(path):
   folderNames = '/Generation/Parameters', '/Simulation/Parameters', '/Digitization/Parameters', '/TagInfo'
   loadDictionary()
   f = ROOT.TFile.Open(path)
   if not f:
      raise RuntimeError('Failed to open file: %s' % repr(path))
   metaData = {}
   try:
      metaDataTree = f.MetaData
      metaDataTree.SetBranchStatus('*', False)
      folderNamesFound = []
      for folderName in folderNames:
         found = array.array('I', [0])
         metaDataTree.SetBranchStatus(folderName.replace('/', '_'), True, found)
         if found[0]:
            folderNamesFound.append(folderName)
         else:
            metaData[folderName] = None
      for metaDataRecord in metaDataTree:
         for folderName in folderNamesFound:
            payload = getattr(metaDataRecord, folderName.replace('/', '_')).m_payload
            folderData = {}
            for attrIndex in payload.m_attrIndexes:
               typeIndex, nameIndex, objIndex = attrIndex.getTypeIndex(), attrIndex.getNameIndex(), attrIndex.getObjIndex()
               payloadItemDesc = ( payloadItemDescs[typeIndex] if typeIndex < len(payloadItemDescs) else None )
               key = payload.m_attrName.at(nameIndex)
               value = ( payloadItemDesc[2](getattr(payload, payloadItemDesc[1]).at(objIndex)) if payloadItemDesc else TypeError )
               if not key in folderData:
                  folderData[key] = value
               elif not folderData[key] == value:
                  folderData[key] = ValueError
            metaData[folderName] = folderData
   finally:
      f.Close()
   return metaData


# merge two meta-data dictionaries
# Conflicting values are set to ValueError.
# Folders/values that exist in one but not another are set to None.
def mergeMetaData(md1, md2):
   assert len(md1) == len(md2)
   md = {}
   for folderName, folderData1 in md1.iteritems():
      folderData2 = md2[folderName]
      if folderData1 is None or folderData2 is None:
         folderData = None
      else:
         folderData = dict( (key, None) for key in folderData1 )
         folderData.update( (key, None) for key in folderData2 )
         for key in folderData:
            if key in folderData1 and key in folderData2:
               if folderData1[key] == folderData2[key]:
                  folderData[key] = folderData1[key]
               else:
                  folderData[key] = ValueError
      md[folderName] = folderData
   return md


# add derived meta-data for convenience
def augmentMetaData(metaData):
   derived = {}
   try:
      value = metaData['/Generation/Parameters']['HepMCWeightNames']
      mcWeightIndexByName = ast.literal_eval(value)
      mcWeightNames = []
      for name, index in mcWeightIndexByName.iteritems():
         if index >= len(mcWeightNames):
            mcWeightNames += ( None for _ in xrange(len(mcWeightNames), index + 1) )
         elif not mcWeightNames[index] is None:
            raise KeyError('multiple names for HepMCWeight index %d' % index)
         mcWeightNames[index] = name
      derived['HepMCWeightNames'] = ','.join(mcWeightNames)
   except Exception:
      # TODO proper error handling
      pass
   metaData['_derived'] = derived


# file-object wrapper around os.dup
def fdup(fileObject):
   fd2 = os.dup(fileObject.fileno())
   fileObject2 = None
   try:
      fileObject2 = os.fdopen(fd2, fileObject.mode, -1)
   finally:
      if fileObject2 is None:
         os.close(fd2)
   return fileObject2


# file-object wrapper around os.dup2
def fdup2(fileObject, fileObject2):
   os.dup2(fileObject.fileno(), fileObject2.fileno())


# write out meta-data as tab-separated plain text
def dumpPlain(metaData, out):
   for folderName in sorted(metaData):
      folderData = metaData[folderName]
      if folderData is None:
         continue
      for key in sorted(folderData):
         value = folderData[key]
207
         if isinstance(value, str):
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
            pass
         elif isinstance(value, numbers.Number):
            value = repr(value)
         else:
            # keys with conflicting/unrecognized values are completely ignored
            continue
         value = re.sub(r'[^\x21-\x7e]+', ' ', value).strip()
         out.write('%s\t%s\t%s\n' % (folderName, key, value))
   out.write('\n')


# supported meta-data dumpers
_dumpers = {
      'plain': dumpPlain,
      'pickle': pickle.dump,
      'json': json.dump,
   }


def main(argv, out):
   parser = argparse.ArgumentParser()
   parser.add_argument('--format', dest='dumper', action='store', choices=_dumpers.keys())
   parser.add_argument('--pickle', dest='dumper', action='store_const', const='pickle')
   parser.add_argument('--files-from', '-T', dest='filesFrom', action='store')
   parser.add_argument('--output', dest='outputFile', action='store')
   parser.add_argument('files', action='store', nargs='*')
   parser.set_defaults(dumper='plain')
   options = parser.parse_args(argv[1:])

   paths = []
   if options.filesFrom:
      with open(options.filesFrom, 'r') as f:
         for line in f:
            paths.extend(filter(None, line.rstrip('\n\r').split(',')))
   if options.files:
      paths += options.files
   dumper = _dumpers[options.dumper]

   path = paths[0]
   metaData = extractMetaData(path)
   for path in paths[1:]:
      metaData = mergeMetaData(metaData, extractMetaData(path))

   augmentMetaData(metaData)
   if options.outputFile:
      with tempfile.NamedTemporaryFile(dir=os.path.dirname(options.outputFile), delete=True) as f:
         dumper(metaData, f)
         f.flush()
         os.link(f.name, options.outputFile)
   else:
      dumper(metaData, out)
      out.flush()


# load ROOT in batch mode, skip loading of any (possibly conflicting) ROOT dictionaries through rootlogon
def loadROOT():
   args = sys.argv[1:]
   try:
      sys.argv[1:] = '-b', '-l', '-n'
      import ROOT
      ROOT.gErrorIgnoreLevel = ROOT.kError
      globals()['ROOT'] = ROOT
   finally:
      sys.argv[1:] = args


# acquire copy of stdout, then redirect /dev/null
# (so stray prints by ROOT don't mess up our dumps)
with fdup(sys.stdout) as out:
   with open(os.devnull, 'w') as nulout:
      fdup2(nulout, sys.stdout)
   loadROOT()
   main(sys.argv, out)