Skip to content
Snippets Groups Projects
Verified Commit e2792acf authored by Frank Sauerburger's avatar Frank Sauerburger
Browse files

Use blinding strategy in hist()

parent 684ad551
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
import pandas as pd
import seaborn as sns
from nnfwtbn import Variable, Process, Cut, hist, McStack, DataStack, Stack, \
RangeBlindingStrategy
from nnfwtbn import toydata
```
%% Cell type:code id: tags:
``` python
df = toydata.get()
```
%% Cell type:markdown id: tags:
# Processes
%% Cell type:code id: tags:
``` python
p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
p_sig = Process(r"Signal", range=(1, 1))
p_asimov = Process(r"Asimov", selection=lambda d: d.fpid >= 0)
```
%% Cell type:markdown id: tags:
# Stacks
%% Cell type:code id: tags:
``` python
colors = ["windows blue", "amber", "greyish", "faded green", "dusty purple"]
s_mc = McStack(p_ztt, p_sig, palette=sns.xkcd_palette(colors))
s_data = DataStack(p_asimov)
```
%% Cell type:markdown id: tags:
# Blinding
%% Cell type:code id: tags:
``` python
b_higgs_m = RangeBlindingStrategy(100, 150)
```
%% Cell type:markdown id: tags:
# Variables
%% Cell type:code id: tags:
``` python
v_higgs_m = Variable(r"$m^H$", "higgs_m", "GeV", blinding=b_higgs_m)
```
%% Cell type:markdown id: tags:
# Actual plot
Stacks passed to `blind` argument will be blinded according to blind strategy of variable `v_higgs_m`.
## Blind data stack
%% Cell type:code id: tags:
``` python
hist(df, v_higgs_m, 20, [s_mc, s_data], range=(0, 200),
weight="weight", ratio_label="Data / SM", blind=s_data, diff=True)
None
```
%% Cell type:markdown id: tags:
## Blind MC stack
%% Cell type:code id: tags:
``` python
hist(df, v_higgs_m, 20, [s_mc, s_data], range=(0, 200),
weight="weight", ratio_label="Data / SM", blind=s_mc)
None
```
%% Cell type:markdown id: tags:
## Blind both stacks
`blind` argument can be a single stack or a list of stacks.
%% Cell type:code id: tags:
``` python
hist(df, v_higgs_m, 20, [s_mc, s_data], range=(0, 200),
weight="weight", ratio_label="Data / SM", blind=[s_mc, s_data])
None
```
......@@ -10,9 +10,10 @@ import seaborn as sns
from atlasify import atlasify
from nnfwtbn.stack import Stack
from nnfwtbn.process import Process
from nnfwtbn.cut import Cut
from nnfwtbn.variable import Variable
from nnfwtbn.variable import Variable, BlindingStrategy
import nnfwtbn.error as err
# Store built-in range
......@@ -75,9 +76,9 @@ def hist(dataframe, variable, bins, stacks, selection=None,
Stacks must be Stack objects. The plotting style is defined via the stack
object.
The optional blind argument controls which process should be blinded. The
argument can be a single process, a list of processes or None. By default,
no process is blinded.
The optional blind argument controls which stack should be blinded. The
argument can be a single stack, a list of stacks or None. By default,
no stack is blinded.
If the figure argument is omitted, this method creates a new axes and
figure. If axes only is omitted, the method creates a new axes from the
......@@ -203,20 +204,32 @@ def hist(dataframe, variable, bins, stacks, selection=None,
numerator_hist = None
denominator_hist = None
if blind is None:
blind = []
elif isinstance(blind, Stack):
# Wrap scalar blind stack
blind = [blind]
# Handle stack
max_content = float('-inf')
for i_stack, stack in enumerate(stacks):
bottom = 0
uncertainty = np.array(0)
if stack in blind and variable.blinding is not None:
c_blind = variable.blinding(variable, bins)
else:
c_blind = lambda d: d
for i_process in range_(len(stack)):
process = stack.processes[i_process]
histogram = stack.get_hist(dataframe, i_process, bins, variable,
weight)
histogram = stack.get_hist(c_blind(dataframe), i_process, bins,
variable, weight)
# Draw uncertainty iff this is the last processes of the stack
if i_process == len(stack) - 1:
uncertainty = stack.get_total_uncertainty(dataframe, bins,
variable, weight)
uncertainty = stack.get_total_uncertainty(c_blind(dataframe),
bins, variable,
weight)
# Draw process
histtype = stack.get_histtype(i_process)
......@@ -224,8 +237,13 @@ def hist(dataframe, variable, bins, stacks, selection=None,
kwds = {'markersize': 4, 'fmt': 'o'}
kwds.update(stack.get_aux(i_process))
axes.errorbar(bin_centers, bottom + histogram, uncertainty,
bin_widths / 2, label=process.label, **kwds)
non_empty = (histogram != 0)
axes.errorbar(bin_centers[non_empty],
(bottom + histogram)[non_empty],
uncertainty[non_empty],
(bin_widths / 2)[non_empty],
label=process.label, **kwds)
else:
kwds = {}
kwds.update(stack.get_aux(i_process))
......@@ -256,7 +274,7 @@ def hist(dataframe, variable, bins, stacks, selection=None,
# Handle ratio plot
if draw_ratio:
_draw_ratio(dataframe, numerator, denominator, bins, variable, weight,
axes_ratio, diff, ratio_range, ratio_label)
axes_ratio, diff, ratio_range, ratio_label, blind)
# Draw vertical lines
for vline in vlines:
......@@ -327,27 +345,37 @@ def hist(dataframe, variable, bins, stacks, selection=None,
return figure, axes
def _draw_ratio(df, numerator, denominator, bins, variable, weight,
axes_ratio, diff=False, ratio_range=None, ratio_label=None):
axes_ratio, diff=False, ratio_range=None, ratio_label=None,
blind=[]):
numerator_hist = numerator.get_total(df, bins, variable, weight)
numerator_error = numerator.get_total_uncertainty(df, bins, variable, weight)
denominator_hist = denominator.get_total(df, bins, variable, weight)
denominator_error = denominator.get_total_uncertainty(df, bins, variable, weight)
if numerator in blind and variable.blinding is not None:
c_blind = variable.blinding(variable, bins)
else:
c_blind = lambda d: d
numerator_hist = numerator.get_total(c_blind(df), bins, variable, weight)
numerator_error = numerator.get_total_uncertainty(c_blind(df), bins, variable, weight)
if denominator in blind and variable.blinding is not None:
c_blind = variable.blinding(variable, bins)
else:
c_blind = lambda d: d
denominator_hist = denominator.get_total(c_blind(df), bins, variable, weight)
denominator_error = denominator.get_total_uncertainty(c_blind(df), bins, variable, weight)
# valid bins with non-zero denominator
idx = (denominator_hist != 0)
non_empty = (denominator_hist != 0) * (numerator_hist != 0)
if diff:
idx = (idx == idx)
non_empty = (numerator_hist != 0)
bin_centers = ((bins[1:] + bins[:-1]) / 2)[idx]
bin_widths = (bins[1:] - bins[:-1])[idx]
bin_centers = ((bins[1:] + bins[:-1]) / 2)[non_empty]
bin_widths = (bins[1:] - bins[:-1])[non_empty]
if diff:
ratio = numerator_hist[idx] - denominator_hist[idx]
ratio_error = numerator_error[idx]
ratio = numerator_hist[non_empty] - denominator_hist[non_empty]
ratio_error = numerator_error[non_empty]
else:
ratio = numerator_hist[idx] / denominator_hist[idx]
ratio_error = numerator_error[idx] / denominator_hist[idx]
ratio = numerator_hist[non_empty] / denominator_hist[non_empty]
ratio_error = numerator_error[non_empty] / denominator_hist[non_empty]
axes_ratio.errorbar(bin_centers,
ratio,
......@@ -357,18 +385,20 @@ def _draw_ratio(df, numerator, denominator, bins, variable, weight,
markersize=4, fmt='o')
if diff:
non_empty = np.ones(len(numerator_hist), dtype='bool')
rel_error = denominator_error
band_lower = 0 - rel_error
else:
denominator_hist[~idx] = 1
numerator_error[idx] = 0
non_empty = (denominator_hist != 0)
denominator_hist[~non_empty] = 1
numerator_error[non_empty] = 0
rel_error = denominator_error / denominator_hist
band_lower = 1 - rel_error
axes_ratio.hist(bins[:-1],
axes_ratio.hist(bins[:-1][non_empty],
bins=bins,
bottom=band_lower,
weights=rel_error * 2,
weights=(rel_error * 2)[non_empty],
fill=False,
hatch='/////',
linewidth=0,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment