From 4f9a0030588811716fc392873e365a75a4ecb93c Mon Sep 17 00:00:00 2001 From: Frank Sauerburger <f.sauerburger@cern.ch> Date: Wed, 26 Jun 2019 21:57:06 +0200 Subject: [PATCH] Add hist styling draft --- histogram.ipynb | 15 ++++++------- nnfwtbn/plot.py | 56 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 49 insertions(+), 22 deletions(-) diff --git a/histogram.ipynb b/histogram.ipynb index 093de90..2537a3a 100644 --- a/histogram.ipynb +++ b/histogram.ipynb @@ -11,11 +11,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", + "import seaborn as sns\n", "df = pd.read_hdf(\"test.h5\")\n", "#df = pd.read_hdf(\"demo/mva.h5\")" ] @@ -31,7 +32,7 @@ "p_zll = Process(r\"$Z\\rightarrow\\ell\\ell$\", range=(-599, -500))\n", "p_fake = Process(r\"Fake\", range=(-199, -100))\n", "\n", - "p_sig = Process(r\"Signal\", range=(1, 1000), type='line')" + "p_sig = Process(r\"Signal\", range=(1, 1000))" ] }, { @@ -58,14 +59,14 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ - "<matplotlib.figure.Figure at 0x7ff6db317208>" + "<matplotlib.figure.Figure at 0x7f855ba95630>" ] }, "metadata": {}, @@ -73,8 +74,8 @@ } ], "source": [ - "hist(df, v_mmc, 40, [p_fake, p_top, p_zll, p_ztt, p_sig], range=(0, 200), selection=c_vbf,\n", - " weight=\"weight\")\n", + "hist(df, v_mmc, 40, [p_fake, p_top, p_zll, p_ztt], [p_sig], range=(0, 200), selection=c_vbf,\n", + " weight=\"weight\", color=[sns.color_palette(\"Blues\"), sns.color_palette()[1:]])\n", "None" ] }, diff --git a/nnfwtbn/plot.py b/nnfwtbn/plot.py index 1f9591c..9167e6c 100644 --- a/nnfwtbn/plot.py +++ b/nnfwtbn/plot.py @@ -46,9 +46,9 @@ def _type_to_histtype(type): type_map = {"fill": "stepfilled", "line": "step"} return type_map[type] -def hist(dataframe, variable, bins, *stacks, data=None, selection=None, - range=None, color=None, blind=None, axes=None, figure=None, - weight=None): +def hist(dataframe, variable, bins, *stacks, selection=None, + range=None, blind=None, axes=None, figure=None, + weight=None, **kwds): """ Creates a histogram of stacked processes. The first argument is the dataframe to operate on. The 'variable' argument defines the x-axis. The @@ -65,15 +65,9 @@ def hist(dataframe, variable, bins, *stacks, data=None, selection=None, in a list are stacked. The type attributes of processes are considered during plotting. - The optional color must have the with same structure as the stacked list - and defines the color of the process. For a list of stacked processes, the - corresponding list of colors can be replaced by a callable which is used - repeatedly for each process in the stack. The colors must be valid - matplotlib colors. - The optional blind argument controls which process should be blinded. The - argument can be a list of processes to blind. By default blinding is - applied to data. Use an empty list to disable blinding. + argument can be a single process, a list of processes or None. By default, + no process is blinded. If the figure argument is omitted, this method creates a new axes and figure. If axes only is omitted, the method creates a new axes from the @@ -85,6 +79,15 @@ def hist(dataframe, variable, bins, *stacks, data=None, selection=None, The weight is used to weight the entries. Entries have unit weight if omitted. The argument can be a string name of a column or a variable object. + + Any other keyword argument is used to define the style. If the keyword + argument is a list, it's length must equal the number of stacks. If the + list item is a list, it is cycled for every stack member. If the list + member is not a list, this value is used for all members of the stack. + If the keyword is not a list, the property is passed directly to + matplotlib. The only exception is 'histtype'. The histtype argument can be + 'step', 'stepfilled' or 'points'. In the former two cases matplotlibs hist + method ist used, in the latter case 'errorbar' is used. """ # Wrap column string by variable if isinstance(variable, str): @@ -119,22 +122,45 @@ def hist(dataframe, variable, bins, *stacks, data=None, selection=None, bins = np.linspace(range[0], range[1], bins + 1) equidistant_bins = True + # Check structure of kwds + new_kwds = {} + for kwd, value in kwds.items(): + if isinstance(value, list): + if len(value) != len(stacks): + raise ValueError("Length of %s must equal number of stacks." + % repr(kwd)) + + # Wrap properties for the whole stack in a list + new_kwds[kwd] = [(_ if isinstance(_, list) else [_]) for _ in value] + + else: + # Single value for all stacks and all processes + new_kwds[kwd] = [[value]]*len(stacks) + kwds = new_kwds + # Handle stack - for stack in stacks: + for i_stack, stack in enumerate(stacks): if isinstance(stack, Process): # Wrap single process stack = [stack] bottom = np.zeros(len(bins) - 1) - for process in stack: + for i_process, process in enumerate(stack): sel = selection & process.selection + # Prepare style + process_kwds = {} + for kwd, props in kwds.items(): + # Cycle through property stacks + stack_props = props[i_stack] + process_kwds[kwd] = stack_props[i_process % len(stack_props)] + n, _, _ = axes.hist(variable(dataframe[sel(dataframe)]), bins=bins, range=range, bottom=bottom, label=process.label, - histtype=_type_to_histtype(process.type), - weights=weight(dataframe[sel(dataframe)])) + weights=weight(dataframe[sel(dataframe)]), + **process_kwds) bottom += n axes.set_xlim((bins.min(), bins.max())) -- GitLab