Commit ad0b4e56 authored by Andrea Sciaba's avatar Andrea Sciaba
Browse files

First version

parent 8c37eacb
#!/usr/bin/env python
"""
_PSetTweaks_
Record a set of tweakable parameters from a CMSSW Configuration in a CMSSW
independent python structure
"""
import inspect
import pickle
import StringIO
import imp
import sys
#py2.6 compatibility
try:
import json
except ImportError, ex:
import simplejson as json
class PSetHolder(object):
"""
_PSetHolder_
Dummy PSet object used to construct the Tweak object to mimic
the config structure
"""
def __init__(self, psetName):
self.psetName_ = psetName
self.parameters_ = []
# //
# // Assistant lambda functions
#//
childPSets = lambda x: [ value for value in x.__dict__.values()
if value.__class__.__name__ == "PSetHolder" ]
childParameters = lambda p, x: [ "%s.%s" % (p,i) for i in x.parameters_ ]
recursiveGetattr = lambda obj, attr: reduce(getattr, attr.split("."), obj)
def parameterIterator(obj):
"""
_parameterIterator_
Util to iterate through the parameters in a PSetHolder
"""
x = None
for x in childParameters(obj):
yield getattr(obj, x)
def psetIterator(obj):
"""
_psetIterator_
Util to iterate through the child psets in a PSetHolder
"""
for x in childPSets(obj):
yield x
class PSetLister:
"""
_PSetLister_
Operator to decompose the PSet structure into a
more sequence like get up
"""
def __init__(self):
self.psets = []
self.parameters = {}
self.queue = []
def __call__(self, pset):
"""
_operator(PSetHolder)_
recursively traverse all parameters in this and all child
PSets
"""
self.queue.append(pset.psetName_)
psetPath = ".".join(self.queue)
self.psets.append(psetPath)
params = childParameters(psetPath, pset)
self.parameters[psetPath] = params
map(self, childPSets(pset))
self.queue.pop(-1)
class JSONiser:
"""
_JSONiser_
Util class to build a json dictionary structure from the PSet tree
and also recover the pset tree from a json structure
"""
def __init__(self):
self.json = {}
self.queue = []
self.parameters = {}
def __call__(self, pset, parent = None):
"""
_operator(pset)_
operate on pset and substructure to build a json dictionary
"""
if parent == None: parent = self.json
thisPSet = parent.get(pset.psetName_, None)
if thisPSet == None:
parent[pset.psetName_] = {}
thisPSet = parent[pset.psetName_]
for param in pset.parameters_:
thisPSet[param] = getattr(pset, param)
thisPSet['parameters_'] = pset.parameters_
for child in childPSets(pset):
self(child, thisPSet)
def dejson(self, dictionary):
"""
_dejson_
Convert the json structure back to PSetHolders
"""
params = dictionary.get('parameters_', [])
queue = ".".join(self.queue)
for param in params:
self.parameters["%s.%s" % (queue, param)] = dictionary[param]
for key, value in dictionary.items():
if type(value) == type(dict()):
self.queue.append(key)
self.dejson(dictionary[key])
self.queue.pop(-1)
class PSetTweak:
"""
_PSetTweak_
Template object listing the parameters to be edited.
Also provides serialisation functionality and defines the
process + tweak operator to apply the tweaks to a process.
"""
def __init__(self):
self.process = PSetHolder("process")
def addParameter(self, attrName, value):
"""
_addAttribute_
Add an attribute as process.pset1.pset2.param = value
Value should be the appropriate python type
"""
currentPSet = None
paramList = attrName.split(".")
for i in range(0, len(paramList)):
param = paramList.pop(0)
if param == "process":
currentPSet = self.process
elif len(paramList) > 0:
if not hasattr(currentPSet, param):
setattr(currentPSet, param, PSetHolder(param))
currentPSet = getattr(currentPSet, param)
else:
setattr(currentPSet, param, value)
currentPSet.parameters_.append(param)
def getParameter(self, paramName):
"""
_getParameter_
Get value of the parameter with the name given of the
form process.module...
"""
if not paramName.startswith("process"):
msg = "Invalid Parameter Name: %s\n" % paramName
msg += "Parameter must start with process"
raise RuntimeError, msg
return recursiveGetattr(self, paramName)
def __iter__(self):
"""
_iterate_
Loop over all parameters in the tweak, returning the
parameter name as a . delimited path and the value
"""
lister = PSetLister()
lister(self.process)
for pset in lister.psets:
for param in lister.parameters[pset]:
yield param , self.getParameter(param)
def psets(self):
"""
_psets_
Generator function to yield the PSets in the tweak
"""
lister = PSetLister()
lister(self.process)
for pset in lister.psets:
yield pset
def __str__(self):
"""string repr for debugging etc"""
result = ""
for x,y in self:
result += "%s = %s\n" % (x, y)
return result
def setattrCalls(self, psetPath):
"""
_setattrCalls_
Generate setattr call for each parameter in the pset structure
Used for generating python format
"""
result = {}
current = None
last = None
psets = psetPath.split(".")
for i in range(0, len(psets)):
pset = psets.pop(0)
last = current
if current == None:
current = pset
else:
current += ".%s" % pset
if last != None:
result[current] = "setattr(%s, \"%s\", PSetHolder(\"%s\"))" % (
last, pset, pset)
return result
def pythonise(self):
"""
_pythonise_
return this object as python format
"""
src = inspect.getsourcelines(PSetHolder)
result = ""
for line in src[0]:
result += line
result += "\n\n"
result += "# define PSet Structure\n"
result += "process = PSetHolder(\"process\")\n"
setattrCalls = {}
for pset in self.psets():
setattrCalls.update(self.setattrCalls(pset))
order = setattrCalls.keys()
order.sort()
for call in order:
if call == "process": continue
result += "%s\n" % setattrCalls[call]
result += "# set parameters\n"
for param, value in self:
psetName = param.rsplit(".", 1)[0]
paramName = param.rsplit(".", 1)[1]
if type(value) == type("string"):
value = "\"%s\"" % value
result += "setattr(%s, \"%s\", %s)\n" % (
psetName, paramName, value)
result += "%s.parameters_.append(\"%s\")\n" % (psetName, paramName)
return result
def jsonise(self):
"""
_jsonise_
return json format of this tweak
"""
jsoniser = JSONiser()
jsoniser(self.process)
result = json.dumps(jsoniser.json)
return result
def jsondictionary(self):
"""
_jsondictionary_
return the json layout dictionary, rather than stringing it
"""
jsoniser = JSONiser()
jsoniser(self.process)
return jsoniser.json
def persist(self, filename, format = "python"):
"""
_persist_
Save this object as either python, json or pickle
"""
if format not in ("python", "json", "pickle"):
msg = "Unsupported Format: %s" % format
raise RuntimeError, msg
if format == "python":
handle = open(filename, 'w')
handle.write(self.pythonise())
handle.close()
if format == "json":
handle = open(filename, "w")
handle.write(self.jsonise())
handle.close()
if format == "pickle":
handle = open(filename, "w")
pickle.dump(self, handle)
handle.close()
return
def unpersist(self, filename, format = None):
"""
_unpersist_
Load data from file provided, if format is not specified, guess
it based on file extension
"""
if format == None:
fileSuffix = filename.rsplit(".", 1)[1]
if fileSuffix == "py":
format = "python"
if fileSuffix == "pkl":
format = "pickle"
if fileSuffix == "json":
format = "json"
if format not in ("python", "json", "pickle"):
msg = "Unsupported Format: %s" % format
raise RuntimeError, msg
if format == "pickle":
handle = open(filename, 'r')
unpickle = pickle.load(handle)
handle.close()
self.process.__dict__.update(unpickle.__dict__)
if format == "python":
modRef = imp.load_source('tempTweak', filename)
lister = PSetLister()
lister(modRef.process)
for pset in lister.psets:
for param in lister.parameters[pset]:
self.addParameter(param , recursiveGetattr(modRef, param))
del modRef, sys.modules['tempTweak']
if format == "json":
handle = open(filename, 'r')
jsonContent = handle.read()
handle.close()
jsoniser = JSONiser()
jsoniser.dejson(json.load(StringIO.StringIO(jsonContent)))
for param, value in jsoniser.parameters.items():
self.addParameter(param , value)
def makeTweakFromJSON(jsonDictionary):
"""
_makeTweakFromJSON_
Make a tweak instance and populate it from a dictionary JSON
structure
"""
jsoniser = JSONiser()
jsoniser.dejson(jsonDictionary)
tweak = PSetTweak()
for param, value in jsoniser.parameters.items():
tweak.addParameter(param , value)
return tweak
#!/usr/bin/env python
"""
_WMTweak_
Define extraction of a standard set of WM related PSet parameters
Note: This can be used within the CMSSW environment to act on a
process/config but does not depend on any CMSSW libraries. It needs to stay like this.
"""
import logging
import pickle
from PSetTweaks.PSetTweak import PSetTweak
from PSetTweaks.PSetTweak import parameterIterator, psetIterator
# params to be extracted from an output module
_TweakOutputModules = [
"fileName",
"logicalFileName",
"compressionLevel",
"basketSize",
"splitLevel",
"overrideInputFileSplitLevels",
"maxSize",
"fastCloning",
"sortBaskets",
"dropMetaData",
#"outputCommands", #this is just a huge pile of stuff which we probably shouldnt be setting anyways
"SelectEvents.SelectEvents",
"dataset.dataTier",
"dataset.filterName",
# TODO: support dataset.* here
]
_TweakParams = [
# options
"process.options.fileMode",
"process.options.wantSummary",
"process.options.allowUnscheduled",
"process.options.makeTriggerResults",
"process.options.Rethrow",
"process.options.SkipEvent",
"process.options.FailPath",
"process.options.FailModule",
"process.options.IgnoreCompletely",
#config metadata
"process.configurationMetadata.name",
"process.configurationMetadata.version",
"process.configurationMetadata.annotation",
# source
"process.source.maxEvents",
"process.source.skipEvents",
"process.source.firstEvent",
"process.source.firstRun",
"process.source.firstLuminosityBlock",
"process.source.numberEventsInRun",
"process.source.fileNames",
"process.source.secondaryFileNames",
"process.source.fileMatchMode",
"process.source.overrideCatalog",
"process.source.numberEventsInLuminosityBlock",
"process.source.firstTime",
"process.source.timeBetweenEvents",
"process.source.eventCreationDelay",
"process.source.needSecondaryFileNames",
"process.source.parametersMustMatch",
"process.source.branchesMustMatch",
"process.source.setRunNumber",
"process.source.skipBadFiles",
"process.source.eventsToSkip",
"process.source.lumisToSkip",
"process.source.eventsToProcess",
"process.source.lumisToProcess",
"process.source.noEventSort",
"process.source.duplicateCheckMode",
"process.source.inputCommands",
"process.source.dropDescendantsOfDroppedBranches",
# maxevents
"process.maxEvents.input",
"process.maxEvents.output",
# TODO: there are more settings stored as a VPSet which are a complete
# ballache to handle, suggest asking framework to change interface here
# job report service
# Everything has shifted to the default cff
# message logger
# Everything is in the default cff
# random seeds
"process.RandomNumberGeneratorService.*.initialSeed",
]
class WMTweakMaskError(StandardError):
def __init__(self, mask = None, msg = "Cannot set process from job mask"):
self.mask = mask
self.message = msg
def __str__(self):
return "Error: %s \n Mask: %s" % (self.message, str(self.mask))
def lfnGroup(job):
"""
_lfnGroup_
Determine the lfnGroup from the job counter and the agent number
provided in the job baggage, the job counter and agent number
default both to 0. The result will be a 5-digit string.
"""
modifier = str(job.get("agentNumber", 0))
lfnGroup = modifier + str(job.get("counter", 0) / 1000).zfill(4)
return lfnGroup
def hasParameter(pset, param, nopop = False):
"""
_hasParameter_
check that pset provided has the attribute chain
specified.
Eg if param is pset.attr1.attr2.attr3
check for pset.attr1.attr2.attr3
returns True if parameter exists, False if not
"""
params = param.split(".")
if not nopop:
params.pop(0) # first param is the pset we have the reference to
lastParam = pset
for param in params:
lastParam = getattr(lastParam, param, None)
if lastParam == None:
return False
if lastParam != None:
return True
return False
def getParameter(pset, param, nopop = False):
"""
_getParameter_