Commit 4f16d10f authored by Louie's avatar Louie
Browse files

sync with systematicsTool hash feb8fabfa0e7a38d335bdf286f1fe66d1e33ad27

parent e045a68e
......@@ -7,10 +7,10 @@ from ROOT import gDirectory
def getListOfKeys(tf):
return [i for i in tf.GetListOfKeys()]
def doMergeTTree(v):
res=None
trees = []
res = None
trees = []
prevDir = gDirectory.GetDirectory('')
for ao, w in v:
newfile = r.TFile(str(abs(w))+".root", "RECREATE")
......@@ -18,17 +18,17 @@ def doMergeTTree(v):
output = r.addBranch(res, w, "1.")
output.Write()
newfile.Write()
trees.append(newfile)
trees.append(newfile)
newfile.Close()
outputChain = r.TChain(str(ao.GetName()))
for i in trees:
for i in trees:
outputChain.Add(i.GetName())
mergeOutput = "merge.root"
mergeOutput = "merge.root"
outputChain.Merge(mergeOutput)
prevDir.cd()
if outputChain.GetNtrees():
cloneTree = outputChain.CloneTree(-1,"fast")
cloneTree.Write()
cloneTree = outputChain.CloneTree(-1,"fast")
cloneTree.Write()
for i in trees:
if os.path.exists(i.GetName()):
......@@ -36,50 +36,50 @@ def doMergeTTree(v):
if os.path.exists(mergeOutput):
os.remove(mergeOutput)
return cloneTree
return cloneTree
def doMergeTH(v):
res=None
res = None
for ao, w in v:
if res==None:
res=ao.Clone()
if res==None:
res = ao.Clone()
#res.Sumw2()
res.Scale(w)
else:
res.Add(ao,w)
res.Add(ao, w)
return res
def doMergeTGraph(v):
res=None
aotype=None
res = None
aotype = None
for ao, w in v:
if res==None:
res=ao.Clone()
if res==None:
res = ao.Clone()
for i in range(res.GetN()):
res.SetPoint(i,res.GetX()[i],ao.GetY()[i]*w)
res.SetPoint(i, res.GetX()[i], ao.GetY()[i]*w)
yerr_hi=(0+ (w*ao.GetErrorYhigh(i))**2)**0.5
if "TGraphErrors"==aotype:
yerr=(0+ (w*ao.GetErrorY(i))**2)**0.5
res.SetPointErrors(i,res.GetErrorX(i), yerr)
res.SetPointErrors(i, res.GetErrorX(i), yerr)
if "TGrapAsymmhErrors"==aotype:
yerr_lo=(0+ (w*ao.GetErrorYlow(i))**2)**0.5
yerr_hi=(0+ (w*ao.GetErrorYhigh(i))**2)**0.5
res.SetPointErrors(i,res.GetErrorXlow(i),res.GetErrorXhigh(i), yerr_lo, yerr_lo)
aotype=res.ClassName()
res.SetPointErrors(i, res.GetErrorXlow(i), res.GetErrorXhigh(i), yerr_lo, yerr_lo)
aotype = res.ClassName()
else:
assert ao.GetN()==res.GetN()
for i in range(res.GetN()):
assert ao.GetX()[i]==res.GetX()[i]
res.SetPoint(i,res.GetX()[i],res.GetY()[i]+(w*ao.GetY()[i]))
res.SetPoint(i, res.GetX()[i], res.GetY()[i]+(w*ao.GetY()[i]))
if "TGraphErrors"==aotype:
yerr=((res.GetErrorY(i))**2 + (w*ao.GetErrorY(i))**2)**0.5
res.SetPointErrors(i,res.GetErrorX(i), yerr )
res.SetPointErrors(i, res.GetErrorX(i), yerr )
if "TGrapAsymmErrors"==aotype:
yerr_hi=((res.GetErrorYhigh(i))**2 + (w*ao.GetErrorYhigh(i))**2)**0.5
yerr_lo=((res.GetErrorYlow(i))**2 + (w*ao.GetErrorYlow(i))**2)**0.5
res.SetPointErrors(i,res.GetErrorXlow(i),res.GetErrorXhigh(i), yerr_lo, yerr_lo)
res.SetPointErrors(i, res.GetErrorXlow(i), res.GetErrorXhigh(i), yerr_lo, yerr_lo)
return res
def doMerge(v):
......@@ -91,7 +91,7 @@ def doMerge(v):
else:
return None
addBranchCode = """
addBranchCode = """
TTree * addBranch(TTree* tree, float normalization, TString selection="1"){
TTree * newtree = tree->CopyTree(selection);
......@@ -99,7 +99,7 @@ TTree * addBranch(TTree* tree, float normalization, TString selection="1"){
TBranch * bnormweight = newtree->Branch("normweight",&normweight,"normweight/F");
int nevents = newtree->GetEntries();
for (Long64_t i=0;i<nevents;i++) {
for (Long64_t i = 0;i<nevents;i++) {
newtree->GetEntry(i);
bnormweight->Fill();
}
......@@ -113,21 +113,21 @@ def main(argv):
The haddw executable is designed to be able to hadd simple root objects (currently TH* objects and TGraph* objects only), where each input file can be weighted by a float
"""
parser = optparse.OptionParser( usage = "%prog [options] output.root input1:weight1 input2:weight2 .. " )
parser.add_option("-f","--filter", help="filter: only merge keys matching this regex", dest="filter", default=None)
parser.add_option("-e","--exclude", help="exclude/veto keys matching this regex", dest="exclude", default=None)
parser.add_option("-r","--replace", help="findString:replaceString. In the analysis object names, replace findString with replaceString ", dest="replace", default=None)
parser.add_option("-f","--filter", help="filter: only merge keys matching this regex", dest="filter", default = None)
parser.add_option("-e","--exclude", help="exclude/veto keys matching this regex", dest="exclude", default = None)
parser.add_option("-r","--replace", help="findString:replaceString. In the analysis object names, replace findString with replaceString ", dest="replace", default = None)
(opts, args) = parser.parse_args()
r.TH1.SetDefaultSumw2()
opts.replace=opts.replace.split(":")
outputFile=args[0]
opts.replace = opts.replace.split(":")
outputFile = args[0]
inputFiles={}
for i in range (1, len(args)):
if not ":" in args[i]:
if ":" not in args[i]:
print("warning, you did not specify a weight for inputfile", args[i], " assuming 1.")
f,w=args[i], 1.
f, w = args[i], 1.
else:
f,w = args[i].split(":")
f, w = args[i].split(":")
assert(os.path.isfile(f))
assert(f not in inputFiles.keys())
......@@ -136,29 +136,29 @@ def main(argv):
outputKeys={}
for inF in inputFiles.keys():
f=inputFiles[inF]['file']
w=inputFiles[inF]['weight']
f = inputFiles[inF]['file']
w = inputFiles[inF]['weight']
for key in getListOfKeys(f):
if f.Get(key.GetName()).InheritsFrom("TH1"):
sumw = f.Get(key.GetName()).Integral()
sumw = f.Get(key.GetName()).Integral()
if (opts.filter is not None):
if (not key.GetName()in re.findall(opts.filter,key.GetName())): continue
if (not key.GetName()in re.findall(opts.filter, key.GetName())): continue
if (opts.exclude is not None):
if ( key.GetName() in re.findall(opts.exclude,key.GetName())): continue
if ( key.GetName() in re.findall(opts.exclude, key.GetName())): continue
if f.Get(key.GetName()).InheritsFrom("TTree"): #if merging root trees, remember to divide the weight by the sum of weights for each DSID!
outputKeys.setdefault(key.GetName(),[]).append([f.Get(key.GetName()), w/sumw])
else:
outputKeys.setdefault(key.GetName(),[]).append([f.Get(key.GetName()), w])
of=r.TFile.Open(outputFile,"RECREATE")
of = r.TFile.Open(outputFile,"RECREATE")
for k, v in outputKeys.items():
res=doMerge(v)
res = doMerge(v)
if opts.replace is not None:
res.SetName(res.GetName().replace(opts.replace[0],opts.replace[1]))
res.SetTitle(res.GetTitle().replace(opts.replace[0],opts.replace[1]))
res.SetName(res.GetName().replace(opts.replace[0], opts.replace[1]))
res.SetTitle(res.GetTitle().replace(opts.replace[0], opts.replace[1]))
res.Write()
if __name__ == "__main__":
......
#!/usr/bin/env python
#Author: Abhishek Nag (TU Dresden)
#email: abhishek.nag@cern.ch
import os, sys, yaml
# Author: Abhishek Nag (TU Dresden)
# email: abhishek.nag@cern.ch
import os
import sys
import yaml
import argparse
SystToolsPath="/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/SystematicsDatabase/"
if os.path.exists(SystToolsPath+"DSID_Database.yaml"):
SystToolsPath = "/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/SystematicsDatabase/"
if os.path.exists(SystToolsPath + "DSID_Database.yaml"):
pass
#print("[INFO] found syst database path at %s"%SystToolsPath )
elif 'SYSTTOOLSPATH' in os.environ.keys():
print("[WARNING] Could not find syst tools path on cvfms, defaulting to one in %s/data/"%os.environ['SYSTTOOLSPATH'])
SystToolsPath='/data/'+os.environ['SYSTTOOLSPATH']
print("[WARNING] Could not find syst tools path on cvfms, defaulting to one in %s/data/" % os.environ['SYSTTOOLSPATH'])
SystToolsPath = '/data/' + os.environ['SYSTTOOLSPATH']
else:
print("[ERROR] Environment variable SYSTTOOLSPATH is not set. It should be set to the systematics-tools directory. Use setupSystematicsTool.sh")
exit(1)
def getWeights(dsid):
info={}
keys=[]
with open("%s/DSID_Database.yaml"%SystToolsPath) as f:
database=yaml.load(f)
if any(str(dsid)==k for k in database.keys()):
keys=database.get(str(dsid))
#print(keys)
info = {}
keys = []
with open("%s/DSID_Database.yaml" % SystToolsPath) as f:
database = yaml.load(f)
if any(str(dsid) == k for k in database.keys()):
keys = database.get(str(dsid))
# print(keys)
if keys == ['nominal']:
info.update({"nominal":"nominal"})
info.update({"nominal": "nominal"})
else:
with open('%s/Weight_Database.yaml'%SystToolsPath) as d:
with open('%s/Weight_Database.yaml' % SystToolsPath) as d:
weight_data = yaml.load(d)
for key in keys:
if key in weight_data.keys():
info.update({key:weight_data.get(key)})
info.update({key: weight_data.get(key)})
else:
print(key+' key for '+str(dsid)+' not in Weight Databse')
print(key + ' key for ' + str(dsid) + ' not in Weight Databse')
else:
print((str(dsid) + ' not in Database'))
return info, keys
def getOrderedWeights(dsid):
res={}
for fn in os.listdir("%s/Weight_files"%SystToolsPath):
res = {}
for fn in os.listdir("%s/Weight_files" % SystToolsPath):
print(fn, str(dsid), (str(dsid) in fn))
if str(dsid) in fn:
with open("%s/Weight_files/%s"%(SystToolsPath,fn)) as f:
counter=0
with open("%s/Weight_files/%s" % (SystToolsPath, fn)) as f:
counter = 0
for line in f.readlines():
line=line.strip()
res[line]=counter
counter+=1
line = line.strip()
res[line] = counter
counter += 1
break
if len(res)==0:
if len(res) == 0:
print((str(dsid) + ' not in Database'))
return res
def main(argv):
parser = argparse.ArgumentParser( description = 'Variation string needed' )
parser.add_argument('-d', '--dsid', default = 410425, type=int, help = 'six digit DSID', )
parser = argparse.ArgumentParser(description='Variation string needed')
parser.add_argument('-d', '--dsid', default=410425, type=int, help='six digit DSID')
args = parser.parse_args()
dictionary, keys=getWeights(args.dsid)
dictionary, keys = getWeights(args.dsid)
print(keys)
print(dictionary)
if __name__ == "__main__":
main( sys.argv[1:] )
if __name__ == "__main__":
main(sys.argv[1:])
#!/usr/bin/env python
#Author: Abhishek Nag (TU Dresden)
#email: abhishek.nag@cern.ch
# Author: Abhishek Nag (TU Dresden)
# email: abhishek.nag@cern.ch
import os, sys
import os
import sys
import optparse
from itertools import chain
sys.path.append('$TestArea/../')
import readDatabase
sys.path.append('$TestArea/../')
def file_is_empty(path):
return os.stat(path).st_size==0
return os.stat(path).st_size == 0
def getDAOD(list_DAOD):
DAOD_types=[]
DAOD_types = []
for d in list_DAOD:
DAOD_types.append(d.split('.')[4])
short_DAOD=list(set(DAOD_types))
short_DAOD = list(set(DAOD_types))
return short_DAOD
def validate(testSampleDir, thisSampleName, testSamplePath, weight_database, outputSamples):
for dirName in os.listdir(testSampleDir):
if thisSampleName.split(":")[1] in dirName:
testSamplePath=testSampleDir+"/"+dirName
if (testSamplePath==None):
testSamplePath = testSampleDir + "/" + dirName
if (testSamplePath is None):
print("[INFO] do not currently have a test sample for ", thisSampleName)
rucioCommand= "rucio download --nrandom 1 %s --dir %s " % (thisSampleName, opts.testSampleDir)
print("[INFO] --> downloading one using this command \n ",rucioCommand)
rucioCommand = "rucio download --nrandom 1 %s --dir %s " % (thisSampleName, opts.testSampleDir)
print("[INFO] --> downloading one using this command \n ", rucioCommand)
os.system(rucioCommand)
testSamplePath=testSampleDir+"/"+thisSampleName.split(":")[1]
testSamplePath = testSampleDir + "/" + thisSampleName.split(":")[1]
if not os.path.exists(testSamplePath):
print("[ERROR INFO] No such directory: %s OR downloading Failed" % (testSamplePath))
return
if not os.listdir(testSamplePath):
print("[ERROR INFO] Downloading Failed for ", testSamplePath)
rmcomand="rm -r %s" % (testSamplePath)
return
for fileName in os.listdir(testSamplePath):
if fileName.endswith('part'):
print("[ERROR INFO] Downloading Failed for ", testSamplePath)
rmdir="rm -r %s" % (testSamplePath)
rmdir = "rm -r %s" % (testSamplePath)
os.system(rmdir)
return
testSamplePath=testSamplePath+"/"+fileName
testSamplePath = testSamplePath + "/" + fileName
break
print("[SUCCESS] found test file ", testSamplePath)
athenaCommand= "athena --filesInput=%s --evtMax=1 MyPackage/MyPackageAlgJobOptions.py" % (testSamplePath)
athenaCommand = "athena --filesInput=%s --evtMax = 1 MyPackage/MyPackageAlgJobOptions.py" % (testSamplePath)
print("[INFO] running athena weight retrieving tool using \n", athenaCommand)
os.system(athenaCommand)
if not os.path.exists('weight.txt'):
print('[INFO] Athena Tool failed to retrieve weights')
return
wfile=open('weight.txt', 'r')
ofile=open(outputSamples, 'a+')
weight_file=wfile.read().splitlines()
wfile = open('weight.txt', 'r')
ofile = open(outputSamples, 'a+')
weight_file = wfile.read().splitlines()
if set(weight_database) == set(weight_file):
print('%s is Validated' % (thisSampleName))
ofile.write(thisSampleName.split(":")[1]+' '+'PASSED'+'\n')
ofile.write(thisSampleName.split(":")[1] + ' ' + 'PASSED' + '\n')
else:
print('%s is NOT Validated' % (thisSampleName))
ofile.write(thisSampleName.split(":")[1]+' '+'FAILED')
ofile.write(thisSampleName.split(":")[1] + ' ' + 'FAILED')
wfile.close()
ofile.close()
rmcommand="rm weight.txt"
rmcommand = "rm weight.txt"
os.system(rmcommand)
rmsample="rm -r %s" % (testSampleDir+"/"+thisSampleName.split(":")[1])
rmsample = "rm -r %s" % (testSampleDir + "/" + thisSampleName.split(":")[1])
os.system(rmsample)
parser = optparse.OptionParser(usage = "%prog [options]")
parser.add_option("-i","--inputSamples", help="list of samples. One per line", dest="inputSamples", default="../toValidate_samples.txt")
parser.add_option("--testSampleDir", help="where to store the test samples.", dest="testSampleDir",default="../validSamples" )
parser.add_option("-o","--outputSamples", help="list of validated samples. One per line, with tag as PASSED/FAILED", dest="outputSamples", default="../validated_samples.txt")
parser = optparse.OptionParser(usage="%prog [options]")
parser.add_option("-i", "--inputSamples", help="list of samples. One per line", dest="inputSamples", default="../toValidate_samples.txt")
parser.add_option("--testSampleDir", help="where to store the test samples.", dest="testSampleDir", default="../validSamples")
parser.add_option("-o", "--outputSamples", help="list of validated samples. One per line, with tag as PASSED/FAILED", dest="outputSamples", default="../validated_samples.txt")
(opts, args) = parser.parse_args()
os.system("mkdir -p %s" % opts.testSampleDir)
f=open(opts.inputSamples)
f = open(opts.inputSamples)
for line in f.readlines():
weight_database=[]
if line[0]=="#" or line=='' : continue
SampleName=line.split()[0]
dsid=SampleName.split(".")[1]
weight_database = []
if line[0] == "#" or line == '': continue
SampleName = line.split()[0]
dsid = SampleName.split(".")[1]
dictionary, keys = readDatabase.getWeights(dsid)
if keys==[]:
if keys == []:
print('[FAIL] DSID %s not found in Database' % (dsid))
continue
for key in keys:
weight_database.append(dictionary[key]['weights'])
weight_database=list(chain.from_iterable(weight_database))
testSamplePath=None
weight_database = list(chain.from_iterable(weight_database))
testSamplePath = None
if 'DAOD' in SampleName:
ofile=open(opts.outputSamples, 'a+')
ofile = open(opts.outputSamples, 'a+')
if any(SampleName.split(":")[1] in oline for oline in ofile.readlines()):
print('[INFO] sample already checked')
continue
ofile.close()
validate(opts.testSampleDir, SampleName, testSamplePath, weight_database, opts.outputSamples)
continue
ruciolist = "rucio list-dids %s*DAOD* --filter type=CONTAINER --short > list_DAOD.txt" % (SampleName.split("_")[0]+'_'+SampleName.split("_")[1]+'_'+SampleName.split("_")[2])
ruciolist=ruciolist.replace('mc15','mc16')
ruciolist = "rucio list-dids %s*DAOD* --filter type = CONTAINER --short > list_DAOD.txt" % (SampleName.split("_")[0] + '_' + SampleName.split("_")[1] + '_' + SampleName.split("_")[2])
ruciolist = ruciolist.replace('mc15', 'mc16')
print("[INFO] rucio listing DAODs using this command \n ", ruciolist)
os.system(ruciolist)
if file_is_empty('list_DAOD.txt'):
print('No DAODs found for the given sample: ', SampleName)
continue
l=open('list_DAOD.txt', 'r')
list_DAOD=l.readlines()
shortDAOD=getDAOD(list_DAOD)
lfile = open('list_DAOD.txt', 'r')
list_DAOD = lfile.readlines()
shortDAOD = getDAOD(list_DAOD)
for DAOD in shortDAOD:
d_list=list(element for element in list_DAOD if DAOD in element)
d_list = list(element for element in list_DAOD if DAOD in element)
d_list.sort()
thisSampleName= d_list[-1].split()[0]
testSamplePath=None
ofile=open(opts.outputSamples, 'a+')
thisSampleName = d_list[-1].split()[0]
testSamplePath = None
ofile = open(opts.outputSamples, 'a+')
if any(thisSampleName.split(":")[1] in oline for oline in ofile.readlines()):
print('[INFO] sample already checked')
continue
......
......@@ -98,8 +98,10 @@ See the source of this script (e.g. use 'less `which %(prog)s`) for more discuss
# use this script as a template around which to write logic that satisfies your
# particular requirements.
import yoda, argparse, sys, math
import yoda
import argparse
import sys
import math
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument("INFILES", nargs="+", help="datafile1 datafile2 [...]")
......@@ -120,21 +122,19 @@ parser.add_argument("--no-veto-empty", action="store_false", default=True, dest=
parser.add_argument("--assume-normalized", action="store_true", default=False, dest="ASSUME_NORMALIZED",
help="DEPRECATED, AND DOES NOTHING. This option _used_ to bypass the detection heuristic for unnormalized histograms")
parser.add_argument("-m", "--match", action="append", dest="PATHPATTERNS", default=[],
help="only write out histograms whose $path/$name string matches any of these regexes")
help="only write out histograms whose $path/$name string matches any of these regexes")
parser.add_argument("-M", "--unmatch", action="append", dest="PATHUNPATTERNS", default=[],
help="exclude histograms whose $path/$name string matches any of these regexes")
help="exclude histograms whose $path/$name string matches any of these regexes")
args = parser.parse_args()
## Include scatters in "add" mode
# Include scatters in "add" mode
if args.STACK:
args.S1D_MODE = "add"
args.S2D_MODE = "add"
args.S3D_MODE = "add"
## Put the incoming objects into a dict from each path to a list of histos and scalings
# Put the incoming objects into a dict from each path to a list of histos and scalings
analysisobjects_in = {}
for fa in args.INFILES:
filename, scale = fa, 1.0
......@@ -142,21 +142,20 @@ for fa in args.INFILES:
try:
filename, scale = fa.rsplit(":", 1)
scale = float(scale)
except:
except Exception:
sys.stderr.write("Error processing arg '%s' with file:scale format\n" % fa)
aos = yoda.read(filename, patterns=args.PATHPATTERNS, unpatterns=args.PATHUNPATTERNS)
for aopath, ao in aos.items():
ao.setAnnotation("yodamerge_scale", scale)
analysisobjects_in.setdefault(aopath, []).append(ao)
analysisobjects_out = {}
for p, aos in analysisobjects_in.items():
## Identify the canonical aotype being handled from the type of the first entry in aos
# Identify the canonical aotype being handled from the type of the first entry in aos
aotype = type(aos[0])
## Check that types match, and just output the first one if they don't
# Check that types match, and just output the first one if they don't
if not all(type(ao) is aotype for ao in aos):
msg = "WARNING: cannot merge mismatched analysis object types for path %s: " % p
scatter_fail = False
......@@ -180,32 +179,30 @@ for p, aos in analysisobjects_in.items():
analysisobjects_out[p] = aos[0]
continue
## Remove empty fillable data objects, to avoid gotchas where e.g. histos are normalised and hence
## ScaledBy should be set... but isn't because the emptiness blocked rescaling to finite area
# Remove empty fillable data objects, to avoid gotchas where e.g. histos are normalised and hence
# ScaledBy should be set... but isn't because the emptiness blocked rescaling to finite area
if args.VETO_EMPTY:
# TODO: Add a Fillable interface/ABC and use that for the type matching
# TODO: Add a Fillable interface/ABC and use that for the type matching
if aotype in (yoda.Counter, yoda.Histo1D, yoda.Histo2D, yoda.Profile1D, yoda.Profile2D):
aos_nonzero = [ao for ao in aos if ao.sumW() != 0] #< possible that this doesn't mean no fills :-/
## Just output the first histo if they are all empty
aos_nonzero = [ao for ao in aos if ao.sumW() != 0] # < possible that this doesn't mean no fills :-/
# Just output the first histo if they are all empty
if not aos_nonzero:
analysisobjects_out[p] = aos[0]
continue
## Reset aos to only contain non-empty ones
# Reset aos to only contain non-empty ones
aos = aos_nonzero
## Counter, Histo and Profile (i.e. Fillable) merging
# TODO: Add a Fillable interface/ABC and use that for the type matching
# Counter, Histo and Profile (i.e. Fillable) merging
# TODO: Add a Fillable interface/ABC and use that for the type matching
if aotype in (yoda.Counter, yoda.Histo1D, yoda.Histo2D, yoda.Profile1D, yoda.Profile2D):