From d5b1c6d63f15b115bba31f08d68146dca3ad5f00 Mon Sep 17 00:00:00 2001 From: Frank Sauerburger <f.sauerburger@cern.ch> Date: Tue, 11 Jun 2019 00:18:48 +0200 Subject: [PATCH] Define hist, variable, process and blinding --- nnfwtbn/__init__.py | 5 +++ nnfwtbn/plot.py | 60 ++++++++++++++++++++++++++++++++++ nnfwtbn/process.py | 46 ++++++++++++++++++++++++++ nnfwtbn/variable.py | 80 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 191 insertions(+) create mode 100644 nnfwtbn/plot.py create mode 100644 nnfwtbn/process.py create mode 100644 nnfwtbn/variable.py diff --git a/nnfwtbn/__init__.py b/nnfwtbn/__init__.py index 6c8e6b9..c1f4e23 100644 --- a/nnfwtbn/__init__.py +++ b/nnfwtbn/__init__.py @@ -1 +1,6 @@ __version__ = "0.0.0" + +from .variable import Variable, RangeBlinding +from .process import Process +from .cut import Cut +from .plot import HistogramFactory, histo diff --git a/nnfwtbn/plot.py b/nnfwtbn/plot.py new file mode 100644 index 0000000..32f3663 --- /dev/null +++ b/nnfwtbn/plot.py @@ -0,0 +1,60 @@ + +class HistogramFactory: + """ + Short-cut to create multiple histogram with the same set of processes or + in the same region. + """ + + def __init__(self, *args, **kwds): + """ + Accepts any number of positional and keyword arguments. The arguments + are stored internally and use default value for hist(). See __call__(). + """ + + def __call__(self, *args, **kwds): + """ + Proxy for method to hist(). The positional argument passed to hist() + are the positional argument given to the constructor concatinated with + the positional argument given to this method. The keyword argument for + hist() is the union of the keyword arguments passed to the constructor + and this method. The argument passed to this method have precedence. + + The method returns the return value of hist. + """ + + +def hist(dataframe, variable, bins, *stacks, data=None, selection=None, + range=None, axes=None, color=None, blind=None, axes=None, + figure=None): + """ + Creates a histogram of stacked processes. The first argument is the + dataframe to operate on. The 'variable' argument defines the x-axis. The + variable argument can be a Variable object or a string naming a column in + the dataframe. + + The 'bins' argument can be an integer specifying the number of bins or a + list with all bin boundaries. If it is an integer, the argument range is + mandatory. The range argument must be a tuple with the lowest and highest + bin edge. The properties of a Variable object are used for the x- and + y-axis labels. + + Stacks can contain process objects or lists of process objects. All items + in a list are stacked. The type attributes of processes are considered + during plotting. + + The optional color must have the with same structure as the stacked list + and defines the color of the process. For a list of stacked processes, the + corresponding list of colors can be replaced by a callable which is used + repeatedly for each process in the stack. The colors must be valid + matplotlib colors. + + The optional blind argument controls which process should be blinded. The + argument can be a list of processes to blind. By default blinding is + applied to data. Use an empty list to disable blinding. + + If the axes argument and/or figure arguments are omitted, this method + creates a new axes/figure. + + The method returns (figure, axes) which were used during plotting. This + might be identical to the figure and axes arguments. + """ diff --git a/nnfwtbn/process.py b/nnfwtbn/process.py new file mode 100644 index 0000000..4df7340 --- /dev/null +++ b/nnfwtbn/process.py @@ -0,0 +1,46 @@ + +class Process: + """ + This class represents a physics process to be selected during training and + plotting. The class stores the cuts to select the process' events from a + dataframe and its style human-readable name for plotting. + """ + + DEFAULT_RANGE_VAR = 'fpid' + + def __init__(self, label, selection=None, type="fill", range=None, + range_var=None, **kwds): + """ + Returns a new process object. The process has a human-readable name + (potentially using latex), a selection cut. The selection argument can + be a cut object or any callable. The optional argument 'type' defines + how the process is displayed in a histogram. Possible values are are + 'fill' and 'line. Stacking of processes is handled by the plotting + method. The default value is 'fill'. + + >>> process_zll = Process("Top", lambda d: d.is_top) + <Process 'Top': (func)> + + >>> process_sig = Process("VBF", lambda d: d.is_VBFH) + <Process 'VBF': (func)> + + The optional argument range accepts a two-value tuple and is a + shortcut to defined a selection cut accepting events whose 'range_var' + is between (including boundaries) the given values. The range_var can + be a string naming a column in the dataframe or a Variable object. + + >>> process_zll = Process(r"Z\rightarrow\ell\ell", range=(-599, -500)) + <Process 'Z\\rightarrow\\ell\\ell": (-599, -500)> + + Any other Keyword argument is passed directly to the matplotlib upon + plotting. + >>> process_sig = Process("VBF", lambda d: d.is_VBFH, linestyle="--") + <Process 'VBF': (func)> + """ + + + def __repr__(self): + """ + Returns a string representation of the process. + """ + diff --git a/nnfwtbn/variable.py b/nnfwtbn/variable.py new file mode 100644 index 0000000..6d43aac --- /dev/null +++ b/nnfwtbn/variable.py @@ -0,0 +1,80 @@ + + +class Blinding: + """ + The blinding class represents a blinding strategies. This is an abstract + base class. Sub-classes must implement the __call__ method. + """ + + @abstractmethod + def __call__(self, dataframe, variable, bins, selection, range=None): + """ + Returns the final selection to apply in order to blind a process. The + first argument is the dataframe to operate on. The second argument is + the variable whose histogram should be blinded. The arguments bins and + range are identical to the ones for the hist method. They might be + used in sub-classes to align the blinding cuts to bin borders. + + The method returns the logical AND of the selection argument and the + blinding cut. + """ + raise NotImplementedError() + +class RangeBlinding(Blinding): + """ + Concrete blinding strategy to which remove all events between a certain + x-axis range. The range might be extended to match the bin borders. + """ + + def __init__(self, start, end): + """ + Returns a new RangeBlinding object. When the object is called, it + returns a selection removing all events that lay between start and + end. The range might be extended to match bin borders. + """ + +class Variable: + """ + Representation of a quantity derived from the columns of a dataframe. The + variable can also directly represent a column of the dataframe. + + The variable object defines a human-readable name for the variable and + it's physical unit. The name and the unit are used for plotting and + labeling of axes. + + >>> Variable("MMC", "ditau_mmc_mlm_m", "GeV") + <Variable: MMC [GeV]> + """ + + def __init__(self, name, definition, unit=None, blinding=None): + """ + Returns a new variable object. The first argument is a human-readable + name (potentially using latex). The second argument defines the value + of the variable. This can be a string naming the column of the + dataframe or a callable that computes the value when a dataframe is + passed to it. + + >>> Variable("MMC", "ditau_mmc_mlm_m", "GeV") + <Variable: MMC [GeV]> + + >>> Variable(r"$\Delta \eta$", lambda df: df.jet_0_eta - df.jet_1_eta) + <Variable: $\Delta \eta$ > + + The optional argument unit defines the unit of the variable. This + information is used for plotting, especially for labeling axes. + + The optional blinding argument accepts a blinding object implementing + the blinding strategy. + """ + + def __call__(self, dataframe): + """ + Returns an array or series of variable computed from the given + dataframe. + """ + + + def __repr__(self): + """ + Returns a string representation. + """ -- GitLab