From d5b1c6d63f15b115bba31f08d68146dca3ad5f00 Mon Sep 17 00:00:00 2001
From: Frank Sauerburger <f.sauerburger@cern.ch>
Date: Tue, 11 Jun 2019 00:18:48 +0200
Subject: [PATCH] Define hist, variable, process and blinding

---
 nnfwtbn/__init__.py |  5 +++
 nnfwtbn/plot.py     | 60 ++++++++++++++++++++++++++++++++++
 nnfwtbn/process.py  | 46 ++++++++++++++++++++++++++
 nnfwtbn/variable.py | 80 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 191 insertions(+)
 create mode 100644 nnfwtbn/plot.py
 create mode 100644 nnfwtbn/process.py
 create mode 100644 nnfwtbn/variable.py

diff --git a/nnfwtbn/__init__.py b/nnfwtbn/__init__.py
index 6c8e6b9..c1f4e23 100644
--- a/nnfwtbn/__init__.py
+++ b/nnfwtbn/__init__.py
@@ -1 +1,6 @@
 __version__ = "0.0.0"
+
+from .variable import Variable, RangeBlinding
+from .process import Process
+from .cut import Cut
+from .plot import HistogramFactory, histo
diff --git a/nnfwtbn/plot.py b/nnfwtbn/plot.py
new file mode 100644
index 0000000..32f3663
--- /dev/null
+++ b/nnfwtbn/plot.py
@@ -0,0 +1,60 @@
+
+class HistogramFactory:
+    """
+    Short-cut to create multiple histogram with the same set of processes or
+    in the same region.
+    """
+
+    def __init__(self, *args, **kwds):
+        """
+        Accepts any number of positional and keyword arguments. The arguments
+        are stored internally and use default value for hist(). See __call__().
+        """
+
+    def __call__(self, *args, **kwds):
+        """
+        Proxy for method to hist(). The positional argument passed to hist()
+        are the positional argument given to the constructor concatinated with
+        the positional argument given to this method. The keyword argument for
+        hist() is the union of the keyword arguments passed to the constructor
+        and this method. The argument passed to this method have precedence.
+
+        The method returns the return value of hist.
+        """
+
+
+def hist(dataframe, variable, bins, *stacks, data=None, selection=None,
+         range=None, axes=None, color=None, blind=None, axes=None,
+         figure=None):
+    """
+    Creates a histogram of stacked processes. The first argument is the
+    dataframe to operate on. The 'variable' argument defines the x-axis. The
+    variable argument can be a Variable object or a string naming a column in
+    the dataframe.
+
+    The 'bins' argument can be an integer specifying the number of bins or a
+    list with all bin boundaries. If it is an integer, the argument range is
+    mandatory. The range argument must be a tuple with the lowest and highest
+    bin edge. The properties of a Variable object are used for the x- and
+    y-axis labels.
+
+    Stacks can contain process objects or lists of process objects. All items
+    in a list are stacked. The type attributes of processes are considered
+    during plotting.
+
+    The optional color must have the with same structure as the stacked list
+    and defines the color of the process. For a list of stacked processes, the
+    corresponding list of colors can be replaced by a callable which is used
+    repeatedly for each process in the stack. The colors must be valid
+    matplotlib colors.
+
+    The optional blind argument controls which process should be blinded. The
+    argument can be a list of processes to blind. By default blinding is
+    applied to data. Use an empty list to disable blinding.
+
+    If the axes argument and/or figure arguments are omitted, this method
+    creates a new axes/figure.
+
+    The method returns (figure, axes) which were used during plotting. This
+    might be identical to the figure and axes arguments.
+    """
diff --git a/nnfwtbn/process.py b/nnfwtbn/process.py
new file mode 100644
index 0000000..4df7340
--- /dev/null
+++ b/nnfwtbn/process.py
@@ -0,0 +1,46 @@
+
+class Process:
+    """
+    This class represents a physics process to be selected during training and
+    plotting. The class stores the cuts to select the process' events from a
+    dataframe and its style human-readable name for plotting.
+    """
+
+    DEFAULT_RANGE_VAR = 'fpid'
+
+    def __init__(self, label, selection=None, type="fill", range=None, 
+                 range_var=None, **kwds):
+        """
+        Returns a new process object. The process has a human-readable name
+        (potentially using latex), a selection cut. The selection argument can
+        be a cut object or any callable. The optional argument 'type' defines
+        how the process is displayed in a histogram. Possible values are are
+        'fill' and 'line. Stacking of processes is handled by the plotting
+        method. The default value is 'fill'.
+
+        >>> process_zll = Process("Top", lambda d: d.is_top)
+        <Process 'Top': (func)>
+
+        >>> process_sig = Process("VBF", lambda d: d.is_VBFH)
+        <Process 'VBF': (func)>
+
+        The optional argument range accepts a two-value tuple and is a
+        shortcut to defined a selection cut accepting events whose 'range_var'
+        is between (including boundaries) the given values. The range_var can
+        be a string naming a column in the dataframe or a Variable object.
+
+        >>> process_zll = Process(r"Z\rightarrow\ell\ell", range=(-599, -500))
+        <Process 'Z\\rightarrow\\ell\\ell": (-599, -500)>
+
+        Any other Keyword argument is passed directly to the matplotlib upon
+        plotting.
+        >>> process_sig = Process("VBF", lambda d: d.is_VBFH, linestyle="--")
+        <Process 'VBF': (func)>
+        """
+
+    
+    def __repr__(self):
+        """
+        Returns a string representation of the process.
+        """
+
diff --git a/nnfwtbn/variable.py b/nnfwtbn/variable.py
new file mode 100644
index 0000000..6d43aac
--- /dev/null
+++ b/nnfwtbn/variable.py
@@ -0,0 +1,80 @@
+
+
+class Blinding:
+    """
+    The blinding class represents a blinding strategies. This is an abstract
+    base class. Sub-classes must implement the __call__ method.
+    """
+
+    @abstractmethod
+    def __call__(self, dataframe, variable, bins, selection, range=None):
+        """
+        Returns the final selection to apply in order to blind a process. The
+        first argument is the dataframe to operate on. The second argument is
+        the variable whose histogram should be blinded. The arguments bins and
+        range are identical to the ones for the hist method. They might be
+        used in sub-classes  to align the blinding cuts to bin borders. 
+
+        The method returns the logical AND of the selection argument and the
+        blinding cut.
+        """
+        raise NotImplementedError()
+        
+class RangeBlinding(Blinding):
+    """
+    Concrete blinding strategy to which remove all events between a certain
+    x-axis range. The range might be extended to match the bin borders.
+    """
+
+    def __init__(self, start, end):
+        """
+        Returns a new RangeBlinding object. When the object is called, it
+        returns a selection removing all events that lay between start and
+        end. The range might be extended to match bin borders.
+        """
+
+class Variable:
+    """
+    Representation of a quantity derived from the columns of a dataframe. The
+    variable can also directly represent a column of the dataframe. 
+
+    The variable object defines a human-readable name for the variable and
+    it's physical unit. The name and the unit are used for plotting and
+    labeling of axes.
+
+    >>> Variable("MMC", "ditau_mmc_mlm_m", "GeV")
+    <Variable: MMC [GeV]>
+    """
+
+    def __init__(self, name, definition, unit=None, blinding=None):
+        """
+        Returns a new variable object. The first argument is a human-readable
+        name (potentially using latex). The second argument defines the value
+        of the variable. This can be a string naming the column of the
+        dataframe or a callable that computes the value when a dataframe is
+        passed to it.
+
+        >>> Variable("MMC", "ditau_mmc_mlm_m", "GeV")
+        <Variable: MMC [GeV]>
+
+        >>> Variable(r"$\Delta \eta$", lambda df: df.jet_0_eta - df.jet_1_eta)
+        <Variable: $\Delta \eta$ >
+
+        The optional argument unit defines the unit of the variable. This
+        information is used for plotting, especially for labeling axes.
+
+        The optional blinding argument accepts a blinding object implementing
+        the blinding strategy.
+        """ 
+
+    def __call__(self, dataframe):
+        """
+        Returns an array or series of variable computed from the given
+        dataframe.
+        """
+
+
+    def __repr__(self):
+        """
+        Returns a string representation.
+        """
-- 
GitLab