Forked from
atlas / athena
143634 commits behind the upstream repository.
-
Thijs Cornelissen authored
* Ignore JetMissingETWord in TAG * PROCTools-00-00-80 2015-04-15 Christian Ohm <christian.ohm@cern.ch> * Adjust to r20 logs to fix TCT comparisons * python/getFileLists.py * python/compareTCTs.py 2015-03-29 Thijs Cornelissen <thijs.cornelissen@cern.ch> * Change to gcc48 2015-03-14 Christian Ohm <christian.ohm@cern.ch> * python/UploadTfAMITag.py - further improvements and a bit of a clean-up 2015-02-27 Christian Ohm <christian.ohm@cern.ch> * python/UploadTfAMITag.py - remove use of AMI credentials, don't reset input file for q-tags
Thijs Cornelissen authored* Ignore JetMissingETWord in TAG * PROCTools-00-00-80 2015-04-15 Christian Ohm <christian.ohm@cern.ch> * Adjust to r20 logs to fix TCT comparisons * python/getFileLists.py * python/compareTCTs.py 2015-03-29 Thijs Cornelissen <thijs.cornelissen@cern.ch> * Change to gcc48 2015-03-14 Christian Ohm <christian.ohm@cern.ch> * python/UploadTfAMITag.py - further improvements and a bit of a clean-up 2015-02-27 Christian Ohm <christian.ohm@cern.ch> * python/UploadTfAMITag.py - remove use of AMI credentials, don't reset input file for q-tags
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
getFileLists.py 8.89 KiB
# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
import os,sys
from xml.dom.minidom import parse
from time import time
import re
from string import *
def tctPath(build,rel):
if not rel.startswith("rel_"):
rel="rel_"+rel
arch = "x86_64-slc6-gcc48-opt"
if build.startswith("17.") or build.startswith("18."):
arch = "i686-slc5-gcc43-opt"
return "/afs/cern.ch/atlas/project/RTT/prod/Results/tct/"+rel+"/"+build+"/build/"+arch+"/offline/Tier0ChainTests/"
class TCTChainInfo:
def __init__(self,dir,log,runEventIn=()):
self.directory=dir
self.logfile=log
self.eventlist=runEventIn;
self.loglines=0
self.cpulist=()
self.memlist=()
self.outputDict={}
def addRunEvent(self,run,event):
self.eventlist+=((run,event),)
class findTCTFiles:
def __init__(self,rDir,vDir,checkAge=False):
self._checkAge=checkAge
self._rDir=rDir
self._vDir=vDir
self._commonDirs=dict()
self._vFiles=[]
self._rFiles=[]
def checkFileAge(self,path):
try:
fileTime=os.stat(path)[8]
except:
return
age=time()-fileTime
if age>86400: #More than 24h
print "WARNING! File",path
print "is more than",int(age/86400.),"day(s) old"
return
def hasLogfile(self,ref,p,fl):
#print "Checking",p
for f in fl:
logpath=p+'/'+f
#print "\t",f
if os.path.isfile(logpath) and f.endswith("_log") and not f.endswith("_script_log"):
#name=f[:-4].rstrip(digits) #Was needed for old rtt version, job index number appended
name=f[:-4]
tci=TCTChainInfo(p,logpath)
if ref: #Reference TCT (the first one)
if self._commonDirs.has_key(name):
print "ERROR: Duplicate directory name",name
else:
self._commonDirs[name]=(tci,)
else: #validation (the second one)
if self._commonDirs.has_key(name): #exits also in reference set
self._commonDirs[name]+=(tci,)
return
def addNew(self,fileList,file):
newSplit=file.split("/")
newStream=newSplit[-3]
newFile=newSplit[-1]
for old in fileList:
oldSplit=old.split("/")
if oldSplit[-1]==newFile and oldSplit[-3]==newStream:
return
#print "Adding",file
fileList+=[file,]
return
def findBetween(self, s, first, last ):
try:
start = s.index( first ) + len( first )
end = s.index( last, start )
return s[start:end]
except ValueError:
return ""
def getTCTChainInfo(self,tci):
validationStartLine = "INFO Validating output files"
try:
lf = open(tci.logfile,"r")
except:
return None
# loop through the log file and find all the output files being validated
foundFileValidationStart = False
nextLineHasEventCount = False
lastOutputFileName = ""
for l in lf:
tci.loglines += 1
if not foundFileValidationStart:
if validationStartLine in l:
foundFileValidationStart = True
elif "Testing event count..." in l:
format = self.findBetween(l, "INFO", "Testing event count...").strip()
tci.outputDict[format] = -1
nextLineHasEventCount = True
lastOutputFileName = format
elif nextLineHasEventCount:
tci.outputDict[lastOutputFileName] = int(self.findBetween(l, "Event counting test passed (", " events)."))
nextLineHasEventCount = False
#print "Done parsing %s, found the following output files:" % tci.logfile
#for file in tci.outputDict:
# print " %s : %d events" % (file, tci.outputDict[format])
lf.close()
return tci.outputDict
def getChains(self):
print "Input directory:",self._rDir
print "Searching for TCT sub-directories"
os.path.walk(self._rDir,self.hasLogfile,True)
print "Found ",len(self._commonDirs),"directories"
return self._commonDirs
def getCommonChains(self):
print "Seaching for compatible TCT directories ..."
allEvents=0
os.path.walk(self._rDir,self.hasLogfile,True) # Reference directory
os.path.walk(self._vDir,self.hasLogfile,False) # Validation directory
names=self._commonDirs.keys()
for tctname in names:
tcis = self._commonDirs[tctname]
if len(tcis) != 2:
self._commonDirs.pop(tctname)
continue
ref = tcis[0]
val = tcis[1]
refEvents = self.getTCTChainInfo(ref)
if refEvents is None or len(refEvents) == 0:
print "No events found in",ref.logfile
self._commonDirs.pop(tctname)
continue
valEvents=self.getTCTChainInfo(val)
if valEvents is None or len(valEvents) == 0:
print "No events found in",val.logfile
continue
if (valEvents == refEvents):
for file in refEvents:
allEvents += refEvents[file]
print "TCT %s output seems compatible for ref and chk:" % (tctname)
for format in refEvents:
print "%-70s: ref: %d events, val: %d events" % (format, refEvents[format], valEvents[format])
else:
print "TCT %s is NOT compatible, outputs different number of events for at least one format:" % tctname
for format in refEvents:
print " %s, ref: %d, val: %d" % (format, refEvents[format], valEvents[format])
# don't compare the files for this then!
self._commonDirs.pop(tctname)
print "Found %i compatible TCT chains with at total of %i events" % (len(self._commonDirs), allEvents)
#rint "Done"
#sys.exit(0)
return self._commonDirs
def findFilesInDir(self,dir,filename):
#print "Searching dir",dir
pattern=re.compile(filename)
result=[]
ls=os.listdir(dir)
for f in ls:
if len(pattern.findall(f)):
#print "Found",f
self.addNew(result,dir+"/"+f)
if "rttjobinfo.xml" in ls:
castorfiles=self.extractCastorPath(dir+"/rttjobinfo.xml",pattern)
for f in castorfiles:
self.addNew(result,f)
return result
def findFiles(self,pattern):
result=dict()
if len(self._commonDirs)==0:
self.getCommonChains()
#print self._commonDirs
for (name,(ref,val)) in self._commonDirs.iteritems():
reffiles=self.findFilesInDir(ref.directory,pattern)
valfiles=self.findFilesInDir(val.directory,pattern)
result[name]=()
#Find pairs:
for rf in reffiles:
rfN=rf.split("/")[-1]
#print "Checking:",rfN
for vf in valfiles:
vfN=vf.split("/")[-1]
if (vfN == rfN):
result[name]+=((rf,vf),)
del vf
break
return result
def extractCastorPath(self,rttxmlfile,pattern):
res=[]
dom=parse(rttxmlfile)
archfiles=dom.getElementsByTagName("archivefile")
for af in archfiles:
cpEle=af.getElementsByTagName("destination")
castorpath=cpEle[0].childNodes[0].data.strip()
#print castorpath
if len(pattern.findall(castorpath)):
res+=[castorpath,]
del dom
return res
if __name__=="__main__":
def usage():
print "Find compatible pairs of TCT files"
print "Usage example"
print sys.argv[0],"15.6.X.Y rel_1 15.6.X.Y-VAL rel_2 'myTAGCOMM.*root'"
if len(sys.argv) != 6:
usage()
sys.exit(-1)
rPath=tctPath(sys.argv[1],sys.argv[2])
vPath=tctPath(sys.argv[3],sys.argv[4])
if not os.access(rPath,os.R_OK):
print "Can't access output of reference TCT at",rPath
sys.exit(-1)
if not os.access(vPath,os.R_OK):
print "Can't access output of reference TCT at",vPath
sys.exit(-1)
pattern=re.compile(sys.argv[5])
ff=findTCTFiles(rPath,vPath)
tctlist=ff.getCommonChains()
res=ff.findFiles(pattern)
for name,fl in res.iteritems():
print name
for (r,v) in fl:
print "Ref:",r
print "Val:",v