Commit d0ca09a1 authored by Marco Clemencic's avatar Marco Clemencic
Browse files

Introduce new configuration Python backend

parent f2de087d
......@@ -178,6 +178,13 @@ test_public_headers_build:
paths:
- .ccache
test-python3:
image: python:3-slim
stage: test
script:
- pip install nose coverage
- PYTHONPATH=GaudiConfiguration/python nosetests -v --with-coverage --cover-package=GaudiConfig2 --cover-min-percentage=100 GaudiConfiguration/tests/nose
# see https://gitlab.cern.ch/gitlabci-examples/deploy_eos for the details
# of the configuration
deploy-doxygen:
......
......@@ -156,7 +156,64 @@ class BootstrapHelper(object):
return (a, b, c)
_bootstrap = None
def getAllOpts(explicit_defaults=False):
from itertools import chain
# old conf
from GaudiKernel.Proxy.Configurable import Configurable, getNeededConfigurables
old_opts = {}
# some algorithms may be generater when we call "getValuedProperties"
# so we need a few iterations before we get the full list
# (see GaudiConfig.ControlFlow)
needed_conf = []
count = 0
new_count = -1
while count != new_count:
count = new_count
needed_conf = getNeededConfigurables()
new_count = len(needed_conf)
for n in needed_conf:
c = Configurable.allConfigurables[n]
if hasattr(c, 'getValuedProperties'):
c.getValuedProperties()
for n in needed_conf:
c = Configurable.allConfigurables[n]
items = (chain(c.getDefaultProperties().items(),
c.getValuedProperties().items())
if explicit_defaults else c.getValuedProperties().items())
for p, v in items:
# Note: AthenaCommon.Configurable does not have Configurable.PropertyReference
if hasattr(Configurable, "PropertyReference") and type(
v) == Configurable.PropertyReference:
# this is done in "getFullName", but the exception is ignored,
# so we do it again to get it
v = v.__resolve__()
if isinstance(v, str):
# properly escape quotes in the string (see gaudi/Gaudi#78)
v = '"%s"' % v.replace('"', '\\"')
elif sys.version_info < (3, ) and isinstance(v, long):
v = '%d' % v # prevent pending 'L'
elif hasattr(v, '__opt_value__'):
v = v.__opt_value__()
old_opts['.'.join((n, p))] = str(v)
import GaudiConfig2
opts = GaudiConfig2.all_options(explicit_defaults)
conflicts = [
n for n in set(opts).intersection(old_opts) if opts[n] != old_opts[n]
]
if conflicts:
conflicts.sort()
log.error('Some properties are set in old and new style configuration')
log.warning('name: old -> new')
for n in conflicts:
log.warning('%s: %s -> %s', n, old_opts[n], opts[n])
sys.exit(10)
opts.update(old_opts)
return opts
def toOpt(value):
......@@ -182,6 +239,36 @@ def toOpt(value):
return repr(value)
def parseOpt(s):
'''
Helper to parse option strings to Python values.
Ideally it should just be "eval", but the string parser of Gaudi
is different from the Python one, so we get string options that
cannot be just evaluated.
>>> print(parseOpt('123'))
123
>>> print(parseOpt('"some\\n\\\\"text\\\\""'))
some
"text"
>>> print(parseOpt(''))
<BLANKLINE>
(see gaudi/Gaudi#78)
'''
import re
quoted_string = re.compile(r'^"(.*)"$', re.DOTALL)
# FIXME: this is needed because we cannot use repr for strings
# (see gaudi/Gaudi#78)
if not s: # pass through empty strings
return s
m = quoted_string.match(s)
if m:
return m.group(1).replace('\\"', '"')
return eval(s)
class gaudimain(object):
def __init__(self):
from Configurables import ApplicationMgr
......@@ -231,8 +318,13 @@ class gaudimain(object):
def generatePyOutput(self, all=False):
from pprint import pformat
conf_dict = Configuration.configurationDict(all)
formatted = pformat(conf_dict)
from collections import defaultdict
optDict = defaultdict(dict)
allOpts = getAllOpts(all)
for key in allOpts:
c, p = key.rsplit('.', 1)
optDict[c][p] = parseOpt(allOpts[key])
formatted = pformat(dict(optDict))
# Python 2 compatibility
if six.PY2:
return formatted
......@@ -242,16 +334,10 @@ class gaudimain(object):
return re.sub(r'"\n +"', '', formatted, flags=re.MULTILINE)
def generateOptsOutput(self, all=False):
conf_dict = Configuration.configurationDict(all)
out = []
names = list(conf_dict.keys())
names.sort()
for n in names:
props = list(conf_dict[n].keys())
props.sort()
for p in props:
out.append('%s.%s = %s;' % (n, p, toOpt(conf_dict[n][p])))
return "\n".join(out)
opts = getAllOpts(all)
keys = sorted(opts)
return '\n'.join(
'{} = {};'.format(key, toOpt(parseOpt(opts[key]))) for key in keys)
def _writepickle(self, filename):
# --- Lets take the first file input file as the name of the pickle file
......@@ -270,7 +356,6 @@ class gaudimain(object):
if not all:
msg += ' (different from default)'
log.info(msg)
conf_dict = Configuration.configurationDict(all)
if old_format:
print(self.generateOptsOutput(all))
else:
......@@ -326,34 +411,11 @@ class gaudimain(object):
def expandvars(data):
return data
from GaudiKernel.Proxy.Configurable import Configurable, getNeededConfigurables
from GaudiKernel.Proxy.Configurable import Configurable
self.log.debug('runSerial: apply options')
conf_dict = {'ApplicationMgr.JobOptionsType': '"NONE"'}
# FIXME: this is to make sure special properties are correctly
# expanded before we fill conf_dict
for c in list(Configurable.allConfigurables.values()):
if hasattr(c, 'getValuedProperties'):
c.getValuedProperties()
for n in getNeededConfigurables():
c = Configurable.allConfigurables[n]
for p, v in c.getValuedProperties().items():
v = expandvars(v)
# Note: AthenaCommon.Configurable does not have Configurable.PropertyReference
if hasattr(Configurable, "PropertyReference") and type(
v) == Configurable.PropertyReference:
# this is done in "getFullName", but the exception is ignored,
# so we do it again to get it
v = v.__resolve__()
if type(v) == str:
# properly escape quotes in the string
v = '"%s"' % v.replace('"', '\\"')
elif sys.version_info < (
3, ) and type(v) == long: # Python 3 compatibility
v = '%d' % v # prevent pending 'L'
conf_dict['{}.{}'.format(n, p)] = str(v)
conf_dict = expandvars(getAllOpts())
conf_dict['ApplicationMgr.JobOptionsType'] = '"NONE"'
if self.printsequence:
conf_dict['ApplicationMgr.PrintAlgsSequence'] = 'true'
......
......@@ -6,21 +6,21 @@ import sys
from tempfile import mkstemp
def getArgsWithoutoProfilerInfo(args):
def getArgsWithoutProfilerInfo(args):
"""
Remove from the arguments the presence of the profiler and its output in
order to relaunch the script w/o infinite loops.
>>> getArgsWithoutoProfilerInfo(['--profilerName', 'igprof', 'myopts.py'])
>>> getArgsWithoutProfilerInfo(['--profilerName', 'igprof', 'myopts.py'])
['myopts.py']
>>> getArgsWithoutoProfilerInfo(['--profilerName=igprof', 'myopts.py'])
>>> getArgsWithoutProfilerInfo(['--profilerName=igprof', 'myopts.py'])
['myopts.py']
>>> getArgsWithoutoProfilerInfo(['--profilerName', 'igprof', '--profilerExtraOptions', 'a b c', 'myopts.py'])
>>> getArgsWithoutProfilerInfo(['--profilerName', 'igprof', '--profilerExtraOptions', 'a b c', 'myopts.py'])
['myopts.py']
>>> getArgsWithoutoProfilerInfo(['--profilerName', 'igprof', '--options', 'a b c', 'myopts.py'])
>>> getArgsWithoutProfilerInfo(['--profilerName', 'igprof', '--options', 'a b c', 'myopts.py'])
['--options', 'a b c', 'myopts.py']
"""
newargs = []
......@@ -131,7 +131,7 @@ if __name__ == "__main__":
os.environ['LC_ALL'] = 'C'
from optparse import OptionParser
parser = OptionParser(usage="%prog [options] <opts_file> ...")
parser = OptionParser(usage="%prog [options] <opts_file|function_id> ...")
parser.add_option(
"-n",
"--dry-run",
......@@ -389,7 +389,7 @@ if __name__ == "__main__":
profilerOutput = opts.profilerOutput or (profilerName + ".output")
# To restart the application removing the igprof option and prepending the string
args = getArgsWithoutoProfilerInfo(sys.argv)
args = getArgsWithoutProfilerInfo(sys.argv)
igprofPerfOptions = "-d -pp -z -o igprof.pp.gz".split()
......@@ -488,10 +488,20 @@ if __name__ == "__main__":
from Gaudi.Main import gaudimain
c = gaudimain()
from GaudiConfig2 import CALLABLE_FORMAT, mergeConfigs, invokeConfig, Configurable
callables = []
opt_files = []
for arg in args:
if CALLABLE_FORMAT.match(arg):
callables.append(arg)
else:
opt_files.append(arg)
# Prepare the "configuration script" to parse (like this it is easier than
# having a list with files and python commands, with an if statements that
# decides to do importOptions or exec)
options = ["importOptions(%r)" % f for f in args]
options = ["importOptions(%r)" % f for f in opt_files]
# The option lines are inserted into the list of commands using their
# position on the command line
optlines = list(opts.options)
......@@ -552,6 +562,12 @@ if __name__ == "__main__":
os.remove(os.environ['GAUDI_TEMP_OPTS_FILE'])
opts.use_temp_opts = False
# Run callables
config = mergeConfigs(*[invokeConfig(f) for f in callables])
# make configurations available to getAllOpts
# FIXME the whole machinery has to be inverted, to avoid relying on globals
Configurable.instances = mergeConfigs(Configurable.instances, config)
if opts.verbose and not opts.use_temp_opts:
c.printconfig(opts.old_opts, opts.all_opts)
if opts.output:
......
......@@ -41,10 +41,10 @@ try:
print("==========================================")
print("= cmd:", " ".join(cmd))
out, err = proc.communicate()
print(out)
print(out.decode('utf-8'))
if err:
print("=== stderr: ===")
print(err)
print(err.decode('utf-8'))
expected = eval(open(outname + ".1.py").read())
# parse the option file, export old options, parse again
......@@ -60,10 +60,10 @@ try:
print("==========================================")
print("= cmd:", " ".join(cmd))
out, err = proc.communicate()
print(out)
print(out.decode('utf-8'))
if err:
print("=== stderr: ===")
print(err)
print(err.decode('utf-8'))
cmd = [
"python",
......@@ -77,10 +77,10 @@ try:
print("==========================================")
print("= cmd:", " ".join(cmd))
out, err = proc.communicate()
print(out)
print(out.decode('utf-8'))
if err:
print("=== stderr: ===")
print(err)
print(err.decode('utf-8'))
result = eval(open(outname + ".2.py").read())
if result != expected:
......
gaudi_subdir(GaudiConfiguration)
gaudi_install_python_modules()
# Ideally I would use '--cover-min-percentage=100', but the version of nose we
# have is a bit old
gaudi_add_test(nose
COMMAND nosetests -v --with-doctest --with-coverage --cover-package=GaudiConfig2
${CMAKE_CURRENT_SOURCE_DIR}/tests/nose
PASSREGEX "TOTAL .* 100%"
FAILREGEX "FAILED")
# GaudiConfig2 documentation {#GaudiConfig2-readme}
## Rationale
After many years of experience with Gaudi Python based configuration, and with
the time window of the CERN LHC Second Long Shutdown (LS2), it's time to
review the implementation of the Python and C++ code we used as backbone for
Python based configuration.
What is required from a new implementation is:
- precise and punctual validation of configuration parameters (properties)
- extensibility (allow user defined semantics for user defined property types
without the need of changes in core C++ code)
- intuitive behaviour, in particular no implicit _named_ singletons
(AKA *global configurables registry*)
## Design principles
### Information flow
Properties exist in two domains (C++ and Python) which should interoperate
exchanging informations.
The following points describe the main steps of a property lifetime:
- a property is declared in C++ as data members of component classes using
`Gaudi::Property<T>`
- a C++ tool (`genconf`) scans the component class looking from properties and
generates files with details about each property of each component class found
- Python facade classes to component classes are generated from the details
collected by the C++ tool
- users define the configuration of a Gaudi application creating instances of
such Python facade classes and assigning values to the properties
- the Gaudi application bootstrap executes user Python configuration code to
collect all user set properties
- all user set properties are passed as strings to a dedicated service inside
the Gaudi application
- the required C++ components are instatiated by the framework and initialized
setting the values of the properties from their string representation
For the exchange of information to work, these operations must be correctly
defined for a property:
- conversion for C++ default value to its Python version
- validation of user input
- conversion from a Python value object to a string that can be parsed to the
corresponding C++ value
For improved usability, there is no need that the internal storage of the Python
value matches the string representation to be passed to C++ for parsing. Take
for example the case of a string property meant to hold _type/name_ of a Gaudi
`Service` (or `AlgTool`) instance; we can easily envisage the internal storage
to be the instance of the Configurable class matching the requests `Service`
type, but the Python property to be assigned from a _type/name_ string or from
the configurable instance (we can extend the type checking to ensure that the
interface implemented by the service matches the requirements from C++), and
only when converting to string during the export to C++ it is mapped to the
correct _type/name_ string.
Implementing the semantics in terms of
- what the user uses to set a value
- what the user sees when getting the value
- what C++ gets as string representation of the value
- what to do with values when merging configurations
allow for creation of very powerful and easy to use interfaces to user options.
### Configurables database
Another aspect to consider is how users access and instantiate the Python
facade classes.
C++ components come from different libraries in different subdirectories of all
projects, and it's often difficult to a component name to the library containing
it.
To simplify user life we can use a database of components that records the
component fully qualified name, the type of component and its properties (with
their types and semantics).
For a more _Pythonic_ access to the facade classes, the C++ namespaces can be
mapped to Python modules, via _fake_ modules that hide the lookup in the
database and create the class objects on demand. Of course, helper functions
to get a facade class via the fully qualified C++ name can be provided too.
The database can be extended to record all kind of information we want to access
from Python. For example, we record the list of interfaces implemented by the
components, so that they can be used in the type checking of property values.
### Stacking configuration files
A typical use case for the configuration of a Gaudi application is to start from
a predefined configuration and change some parameters (for examples the
verbosity level, or the number of events to process).
Instances of facade classes are, by default, not participating in the
configuration of the application, and to enamble them they must be given a name,
which is required by Gaudi framework to instantiate the corresponding C++
classes. Once a name is used by an instance, it cannot be used by another, but
there must be a way to get the instance with a given name (it is also useful
to allow automatic creation of a new instance if the name is not used yet,
similar to the behaviour of Python's `dict.get` method).
Retrieving either a named instance is a way of overriding its settings. Another
way would be to keep unnamed instances in shared Python modules, to be then
given a name to enter the actual final configuration.
Means to merge unnamed or named instances can be also provided.
To be noted that some property semantics may required an instance to have a
name, for example to add an algorithm to the list of executed algorithms.
# recursively install wrapper modules for Configurables
from GaudiConfig2._db import ConfigurablesDB
Configurables = ConfigurablesDB(__name__ + '.Configurables')
del ConfigurablesDB # no need to use this class after this point
from GaudiConfig2._configurables import (Property, Configurable, all_options,
useGlobalInstances)
import re
from sys import version_info
if version_info >= (3, ): # pragma no cover
basestring = str
# Regular expression to check if any of the options is a Python callable,
# in the form of a string like `package.sub_package.module:callable`
CALLABLE_FORMAT = re.compile(
r'^(?P<module>[a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*):(?P<callable>[a-zA-Z_][a-zA-Z0-9_]*)$'
)
def _makeConfigDict(iterable):
try: # pragma no cover
from collections.abc import Mapping
except ImportError: # pragma no cover
from collections import Mapping
if not isinstance(iterable, Mapping):
return {c.name: c for c in iterable}
return iterable
def mergeConfigs(*configs):
'''
Merge configuration dictionaries ({'name': Configurable('name'), ...}) or
lists ([Configurable('name'), ...]) into one configuration dictionary.
**warning** the configurable instances passed are not cloned during the
merging, so the arguments to this function cannot be used afterwards
'''
result = {}
for config in configs:
config = _makeConfigDict(config)
for name in config:
if name in result:
result[name].merge(config[name])
else:
result[name] = config[name]
return result
def invokeConfig(func, *args, **kwargs):
from importlib import import_module
if not callable(func):
if isinstance(func, basestring):
m = CALLABLE_FORMAT.match(func)
if m:
func = getattr(
import_module(m.group('module')), m.group('callable'))
else:
raise ValueError('invalid callable id %r' % func)
else:
raise TypeError(
'expected either a callable or a string as first argument')
return _makeConfigDict(func(*args, **kwargs))
# Classes and functions for handling Configurables
from __future__ import absolute_import
import sys
_GLOBAL_INSTANCES = False
def useGlobalInstances(enable):
'''
Enable or disable the global instances database.
By default global instances are enabled.
'''
global _GLOBAL_INSTANCES
if enable == _GLOBAL_INSTANCES:
return
if not enable:
assert not Configurable.instances, \
'Configurable instances DB not empty, cannot be disabled'
_GLOBAL_INSTANCES = enable
class Property(object):
'''
Descriptor class to implement validation of Configurable properties.
'''
def __init__(self, cpp_type, default, doc='undocumented', semantics=None):
from .semantics import getSemanticsFor
self.semantics = getSemanticsFor(semantics or cpp_type)
self.default = default
self.__doc__ = doc
@property
def cpp_type(self):
return self.semantics.cpp_type
@property
def name(self):
return self.semantics.name
def __get__(self, instance, owner):
if (self.name not in instance._properties
and hasattr(self.semantics, 'default')):
instance._properties[self.name] = self.semantics.default(
self.default)
return self.semantics.load(
instance._properties.get(self.name, self.default))
def __set__(self, instance, value):
instance._properties[self.name] = self.semantics.store(value)
def __delete__(self, instance):
del instance._properties[self.name]
def __set_name__(self, owner, name):
self.semantics.name = name
def __is_set__(self, instance, owner):
try:
value = instance._properties[self.name]
return self.semantics.is_set(value)
except KeyError:
return False
def __opt_value__(self, instance, owner):
return self.semantics.opt_value(
instance._properties.get(self.name, self.default))
def __merge__(self, instance, owner, value):
'''
Return "merge" (according to the semantic) of the value
in this property and the incoming value.
'''
if not self.__is_set__(instance, owner):
return value
return self.semantics.merge(self.__get__(instance, owner), value)
class ConfigurableMeta(type):
'''
Metaclass for Configurables.
'''
def __new__(cls, name, bases, namespace, **kwds):
props = {
key: namespace[key]
for key in namespace if isinstance(namespace[key], Property)
}
if props:
doc = namespace.get('__doc__', '').rstrip()
doc += '\n\nProperties\n----------\n'
doc += '\n'.join([
'- {name}: {p.cpp_type} ({p.default!r})\n {p.__doc__}\n'.
format(name=n, p=props[n]) for n in props
])
namespace['__doc__'] = doc