Commit ad0b4e56 authored by Andrea Sciaba's avatar Andrea Sciaba
Browse files

First version

parent 8c37eacb
#!/usr/bin/env python
Record a set of tweakable parameters from a CMSSW Configuration in a CMSSW
independent python structure
import inspect
import pickle
import StringIO
import imp
import sys
#py2.6 compatibility
import json
except ImportError, ex:
import simplejson as json
class PSetHolder(object):
Dummy PSet object used to construct the Tweak object to mimic
the config structure
def __init__(self, psetName):
self.psetName_ = psetName
self.parameters_ = []
# //
# // Assistant lambda functions
childPSets = lambda x: [ value for value in x.__dict__.values()
if value.__class__.__name__ == "PSetHolder" ]
childParameters = lambda p, x: [ "%s.%s" % (p,i) for i in x.parameters_ ]
recursiveGetattr = lambda obj, attr: reduce(getattr, attr.split("."), obj)
def parameterIterator(obj):
Util to iterate through the parameters in a PSetHolder
x = None
for x in childParameters(obj):
yield getattr(obj, x)
def psetIterator(obj):
Util to iterate through the child psets in a PSetHolder
for x in childPSets(obj):
yield x
class PSetLister:
Operator to decompose the PSet structure into a
more sequence like get up
def __init__(self):
self.psets = []
self.parameters = {}
self.queue = []
def __call__(self, pset):
recursively traverse all parameters in this and all child
psetPath = ".".join(self.queue)
params = childParameters(psetPath, pset)
self.parameters[psetPath] = params
map(self, childPSets(pset))
class JSONiser:
Util class to build a json dictionary structure from the PSet tree
and also recover the pset tree from a json structure
def __init__(self):
self.json = {}
self.queue = []
self.parameters = {}
def __call__(self, pset, parent = None):
operate on pset and substructure to build a json dictionary
if parent == None: parent = self.json
thisPSet = parent.get(pset.psetName_, None)
if thisPSet == None:
parent[pset.psetName_] = {}
thisPSet = parent[pset.psetName_]
for param in pset.parameters_:
thisPSet[param] = getattr(pset, param)
thisPSet['parameters_'] = pset.parameters_
for child in childPSets(pset):
self(child, thisPSet)
def dejson(self, dictionary):
Convert the json structure back to PSetHolders
params = dictionary.get('parameters_', [])
queue = ".".join(self.queue)
for param in params:
self.parameters["%s.%s" % (queue, param)] = dictionary[param]
for key, value in dictionary.items():
if type(value) == type(dict()):
class PSetTweak:
Template object listing the parameters to be edited.
Also provides serialisation functionality and defines the
process + tweak operator to apply the tweaks to a process.
def __init__(self):
self.process = PSetHolder("process")
def addParameter(self, attrName, value):
Add an attribute as process.pset1.pset2.param = value
Value should be the appropriate python type
currentPSet = None
paramList = attrName.split(".")
for i in range(0, len(paramList)):
param = paramList.pop(0)
if param == "process":
currentPSet = self.process
elif len(paramList) > 0:
if not hasattr(currentPSet, param):
setattr(currentPSet, param, PSetHolder(param))
currentPSet = getattr(currentPSet, param)
setattr(currentPSet, param, value)
def getParameter(self, paramName):
Get value of the parameter with the name given of the
form process.module...
if not paramName.startswith("process"):
msg = "Invalid Parameter Name: %s\n" % paramName
msg += "Parameter must start with process"
raise RuntimeError, msg
return recursiveGetattr(self, paramName)
def __iter__(self):
Loop over all parameters in the tweak, returning the
parameter name as a . delimited path and the value
lister = PSetLister()
for pset in lister.psets:
for param in lister.parameters[pset]:
yield param , self.getParameter(param)
def psets(self):
Generator function to yield the PSets in the tweak
lister = PSetLister()
for pset in lister.psets:
yield pset
def __str__(self):
"""string repr for debugging etc"""
result = ""
for x,y in self:
result += "%s = %s\n" % (x, y)
return result
def setattrCalls(self, psetPath):
Generate setattr call for each parameter in the pset structure
Used for generating python format
result = {}
current = None
last = None
psets = psetPath.split(".")
for i in range(0, len(psets)):
pset = psets.pop(0)
last = current
if current == None:
current = pset
current += ".%s" % pset
if last != None:
result[current] = "setattr(%s, \"%s\", PSetHolder(\"%s\"))" % (
last, pset, pset)
return result
def pythonise(self):
return this object as python format
src = inspect.getsourcelines(PSetHolder)
result = ""
for line in src[0]:
result += line
result += "\n\n"
result += "# define PSet Structure\n"
result += "process = PSetHolder(\"process\")\n"
setattrCalls = {}
for pset in self.psets():
order = setattrCalls.keys()
for call in order:
if call == "process": continue
result += "%s\n" % setattrCalls[call]
result += "# set parameters\n"
for param, value in self:
psetName = param.rsplit(".", 1)[0]
paramName = param.rsplit(".", 1)[1]
if type(value) == type("string"):
value = "\"%s\"" % value
result += "setattr(%s, \"%s\", %s)\n" % (
psetName, paramName, value)
result += "%s.parameters_.append(\"%s\")\n" % (psetName, paramName)
return result
def jsonise(self):
return json format of this tweak
jsoniser = JSONiser()
result = json.dumps(jsoniser.json)
return result
def jsondictionary(self):
return the json layout dictionary, rather than stringing it
jsoniser = JSONiser()
return jsoniser.json
def persist(self, filename, format = "python"):
Save this object as either python, json or pickle
if format not in ("python", "json", "pickle"):
msg = "Unsupported Format: %s" % format
raise RuntimeError, msg
if format == "python":
handle = open(filename, 'w')
if format == "json":
handle = open(filename, "w")
if format == "pickle":
handle = open(filename, "w")
pickle.dump(self, handle)
def unpersist(self, filename, format = None):
Load data from file provided, if format is not specified, guess
it based on file extension
if format == None:
fileSuffix = filename.rsplit(".", 1)[1]
if fileSuffix == "py":
format = "python"
if fileSuffix == "pkl":
format = "pickle"
if fileSuffix == "json":
format = "json"
if format not in ("python", "json", "pickle"):
msg = "Unsupported Format: %s" % format
raise RuntimeError, msg
if format == "pickle":
handle = open(filename, 'r')
unpickle = pickle.load(handle)
if format == "python":
modRef = imp.load_source('tempTweak', filename)
lister = PSetLister()
for pset in lister.psets:
for param in lister.parameters[pset]:
self.addParameter(param , recursiveGetattr(modRef, param))
del modRef, sys.modules['tempTweak']
if format == "json":
handle = open(filename, 'r')
jsonContent =
jsoniser = JSONiser()
for param, value in jsoniser.parameters.items():
self.addParameter(param , value)
def makeTweakFromJSON(jsonDictionary):
Make a tweak instance and populate it from a dictionary JSON
jsoniser = JSONiser()
tweak = PSetTweak()
for param, value in jsoniser.parameters.items():
tweak.addParameter(param , value)
return tweak
#!/usr/bin/env python
Define extraction of a standard set of WM related PSet parameters
Note: This can be used within the CMSSW environment to act on a
process/config but does not depend on any CMSSW libraries. It needs to stay like this.
import logging
import pickle
from PSetTweaks.PSetTweak import PSetTweak
from PSetTweaks.PSetTweak import parameterIterator, psetIterator
# params to be extracted from an output module
_TweakOutputModules = [
#"outputCommands", #this is just a huge pile of stuff which we probably shouldnt be setting anyways
# TODO: support dataset.* here
_TweakParams = [
# options
#config metadata
# source
# maxevents
# TODO: there are more settings stored as a VPSet which are a complete
# ballache to handle, suggest asking framework to change interface here
# job report service
# Everything has shifted to the default cff
# message logger
# Everything is in the default cff
# random seeds
class WMTweakMaskError(StandardError):
def __init__(self, mask = None, msg = "Cannot set process from job mask"):
self.mask = mask
self.message = msg
def __str__(self):
return "Error: %s \n Mask: %s" % (self.message, str(self.mask))
def lfnGroup(job):
Determine the lfnGroup from the job counter and the agent number
provided in the job baggage, the job counter and agent number
default both to 0. The result will be a 5-digit string.
modifier = str(job.get("agentNumber", 0))
lfnGroup = modifier + str(job.get("counter", 0) / 1000).zfill(4)
return lfnGroup
def hasParameter(pset, param, nopop = False):
check that pset provided has the attribute chain
Eg if param is pset.attr1.attr2.attr3
check for pset.attr1.attr2.attr3
returns True if parameter exists, False if not
params = param.split(".")
if not nopop:
params.pop(0) # first param is the pset we have the reference to
lastParam = pset
for param in params:
lastParam = getattr(lastParam, param, None)
if lastParam == None:
return False
if lastParam != None:
return True
return False
def getParameter(pset, param, nopop = False):