diff --git a/freeforestml/model.py b/freeforestml/model.py
index de45ed5bda1113c178f76554caca077e7554f427..3f42c22e69850ea117657dbc185bc23a27b61a6d 100644
--- a/freeforestml/model.py
+++ b/freeforestml/model.py
@@ -7,6 +7,7 @@ import json
 
 import numpy as np
 import pandas as pd
+
 import tensorflow
 
 from freeforestml.variable import Variable
@@ -69,7 +70,7 @@ class CrossValidator(ABC):
         """
 
     @abstractmethod
-    def select_training(self, df, fold_i):
+    def select_training(self, df, fold_i, for_predicting = False):
         """
         Returns the index array to select all training events from the dataset for the
         given fold.
@@ -89,7 +90,7 @@ class CrossValidator(ABC):
         given fold.
         """
 
-    def select_cv_set(self, df, cv, fold_i):
+    def select_cv_set(self, df, cv, fold_i, for_predicting = False):
         """
         Returns the index array to select all events from the cross validator
         set specified with cv ('train', 'val', 'test') for the given fold.
@@ -98,7 +99,7 @@ class CrossValidator(ABC):
             raise ValueError("Argument 'cv' must be one of 'train', 'val', "
                              "'test', 'all'; but was %s." % repr(cv))
         if cv == "train":
-            selected = self.select_training(df, fold_i)
+            selected = self.select_training(df, fold_i, for_predicting = for_predicting)
         elif cv == "val":
             selected = self.select_validation(df, fold_i)
         else:
@@ -108,11 +109,15 @@ class CrossValidator(ABC):
     def retrieve_fold_info(self, df, cv):
         """
         Returns and array of integers to specify which event was used
-        for train/val/test in which fold
+        for train/val/test in which fold. Mostly useful for the inference/predict
+        step. For cross validators with a high number of folds, so that an event 
+        is used in multiple folds for the training set, a single fold number is 
+        retrieved so that the folds are equally represented in the predicted 
+        training data.
         """
         fold_info = np.zeros(len(df), dtype='bool') - 1
         for fold_i in range(self.k):
-            selected = self.select_cv_set(df, cv, fold_i)
+            selected = self.select_cv_set(df, cv, fold_i, True)
             fold_info[selected] = fold_i
         return fold_info
 
@@ -179,7 +184,7 @@ class ClassicalCV(CrossValidator):
             return (slice_id / (self.k * 2.0) <= variable) \
                    & (variable < (slice_id + 1.0) / (self.k * 2))
 
-    def select_training(self, df, fold_i):
+    def select_training(self, df, fold_i, for_predicting = False):
         """
         Returns the index array to select all training events from the dataset for the
         given fold.
@@ -190,7 +195,6 @@ class ClassicalCV(CrossValidator):
                 continue
 
             selected = selected | self.select_slice(df, slice_i)
-
         return selected
 
     def select_validation(self, df, fold_i):
@@ -207,9 +211,8 @@ class ClassicalCV(CrossValidator):
         """
         selected = np.zeros(len(df), dtype='bool')
         for slice_i in range(self.k, self.k * 2):
-
             selected = selected | self.select_slice(df, slice_i)
-
+            
         return selected
 
 
@@ -247,7 +250,7 @@ class NoTestCV(CrossValidator):
             return (slice_id / self.k <= variable) \
                    & (variable < (slice_id + 1.0) / self.k)
 
-    def select_training(self, df, fold_i):
+    def select_training(self, df, fold_i, for_predicting = False):
         """
         Returns the index array to select all training events from the
         dataset. The fold_i parameter has no effect.
@@ -307,7 +310,7 @@ class BinaryCV(CrossValidator):
             return (slice_id / self.k <= variable) \
                    & (variable < (slice_id + 1.0) / self.k)
 
-    def select_training(self, df, fold_i):
+    def select_training(self, df, fold_i, for_predicting = False):
         """
         Returns the index array to select all training events from the dataset for the
         given fold.
@@ -357,18 +360,47 @@ class MixedCV(CrossValidator):
             return (slice_id / self.k <= variable) \
                    & (variable < (slice_id + 1.0) / self.k)
 
-    def select_training(self, df, fold_i):
+    def select_training_slices(self, fold_i, for_predicting = False):
+        """
+        Returns array with integers corresponding 
+        to the data slices used in training fold_i.
+        If 'for_predicting' is set to True only one slice 
+        is returned for each fold so that the folds are equally represented
+        in the predicted training data.
+        """
+        all_slices_for_folds = []
+        for fold in range(self.k):
+            all_slices_for_folds.append([])
+            for slice_i in range(self.k):
+                if (slice_i + fold) % self.k == self.k - 1:
+                    continue
+                if (slice_i + fold) % self.k == self.k - 2:
+                    continue
+                all_slices_for_folds[-1].append(slice_i)
+
+        # if we select the slices for training we are done
+        if not for_predicting: return all_slices_for_folds[fold_i]
+        
+        # all_slices_for_folds looks e.g. like:
+        # [[0, 1, 2], [0, 1, 4], [0, 3, 4], [2, 3, 4], [1, 2, 3]]
+        # need to select array with uniq entries:
+        # [0, 1, 2, 4, 3]
+        uniq_el = lambda ar: set(x for l in ar for x in l)
+        exclusive_slices = []
+        for i, slices in enumerate(all_slices_for_folds):
+            for sl in slices:
+                if sl not in exclusive_slices and sl in uniq_el(all_slices_for_folds[i:]):
+                    exclusive_slices.append(sl)
+        return [exclusive_slices[fold_i]]
+        
+    def select_training(self, df, fold_i, for_predicting = False):
         """
         Returns the index array to select all training events from the dataset for the
         given fold.
         """
         selected = np.zeros(len(df), dtype='bool')
-        for slice_i in range(self.k):
-            if (slice_i + fold_i) % self.k == self.k - 1:
-                continue
-            if (slice_i + fold_i) % self.k == self.k - 2:
-                continue
-
+        slices = self.select_training_slices(fold_i, for_predicting = for_predicting)
+        for slice_i in slices:
             selected = selected | self.select_slice(df, slice_i)
 
         return selected
@@ -633,14 +665,20 @@ class HepNet:
 
         return True
 
-    def fit(self, df, weight=None, **kwds):
+    def fit(self, df, train_weight=None, event_weight=None, all_callbacks=None, **kwds):
         """
         Calls fit() on all folds. All kwds are passed to fit().
         """
-        if weight is None:
-            weight = Variable("unity", lambda d: np.ones(len(d)))
-        elif isinstance(weight, str):
-            weight = Variable(weight, weight)
+        # For the weights we have event weights and sample weights.
+        # We want to distinguish between both to use custom callbacks
+        if train_weight is None:
+            train_weight = Variable("unity", lambda d: np.ones(len(d)))
+        elif isinstance(train_weight, str):
+            train_weight = Variable(train_weight, train_weight)
+        if event_weight is None:
+            event_weight = Variable("unity", lambda d: np.ones(len(d)))
+        elif isinstance(event_weight, str):
+            event_weight = Variable(event_weight, event_weight)
 
         ### Loop over folds:
         self.norms = []
@@ -666,15 +704,27 @@ class HepNet:
             model = self.model_cls()
             self.models.append(model)
 
-
+            # check for and initialize custom callbacks
+            lazily_initialized_callbacks = []
+            lazily_initialized_callbacks_names = []
+            for cc in all_callbacks:
+                if cc == "Z0Callback":  # callback that retrieves significance
+                    lazily_initialized_callbacks.append(Z0Callback(validation_df[self.input_list],
+                                                       validation_df[self.output_list],
+                                                       # only use event weights, no sample weights
+                                                       np.array(event_weight(validation_df)) ))
+                    lazily_initialized_callbacks_names.append(cc)
+            callbacks = [c for c in all_callbacks if not c in lazily_initialized_callbacks_names] + lazily_initialized_callbacks
+            
             history = model.fit(training_df[self.input_list],
                                 training_df[self.output_list],
                                 validation_data=(
                                     validation_df[self.input_list],
                                     validation_df[self.output_list],
-                                    np.array(weight(validation_df)),
+                                    np.array(train_weight(validation_df)),
                                 ),
-                                sample_weight=np.array(weight(training_df)),
+                                sample_weight=np.array(train_weight(training_df)),
+                                callbacks = callbacks, 
                                 **kwds)
 
             history = history.history
@@ -702,7 +752,7 @@ class HepNet:
             norm = self.norms[fold_i]
 
             # identify fold
-            selected = self.cv.select_cv_set(df, cv, fold_i)
+            selected = self.cv.select_cv_set(df, cv, fold_i, for_predicting = True)
 
             test_set |= selected
             out[selected] = model.predict(norm(df[selected][self.input_list]),
@@ -739,6 +789,9 @@ class HepNet:
         with h5py.File(path, "w") as output_file:
             # save default model class
             # since this is a arbitrary piece of python code we need to use the python_to_str function
+            # The following is not working with tensorflow 2.0 and disabled eager_execution
+            # the following error is thrown:
+            # NotImplementedError: numpy() is only available when eager execution is enabled.
             group = output_file.create_group("models/default")
             group.attrs["model_cls"] = np.string_(python_to_str(self.model_cls))
 
@@ -856,3 +909,37 @@ class HepNet:
                       f"{path_base}_vars_{fold_i}.json "
                       f"{path_base}_wght_{fold_i}.h5 "
                       f"> {path_base}_{fold_i}.json", file=script_file)
+
+class Z0Callback(tensorflow.keras.callbacks.Callback):
+
+    def __init__(self, X_valid=0, Y_valid=0, W_valid = 0):
+        self.X_valid = np.array(X_valid)
+        self.Y_valid = np.array(Y_valid)
+        self.W_valid = np.array(W_valid)
+        self.W_valid = self.W_valid.reshape((self.W_valid.shape[0], 1))
+
+    def add_to_history(self, Z0):
+        if "Z0" in self.model.history.history.keys():
+            self.model.history.history["Z0"].append(Z0)
+        else: # first epoch
+            self.model.history.history["Z0"] = [Z0]
+
+    def on_epoch_end(self, epoch, logs=None):
+        
+        y_pred = np.array(self.model.predict(self.X_valid, batch_size=4096))
+        w_bkg = self.W_valid[self.Y_valid == 0]
+        w_sig = self.W_valid[self.Y_valid == 1]
+        y_bkg = y_pred[self.Y_valid == 0]
+        y_sig = y_pred[self.Y_valid == 1]
+
+        c_sig , edges = np.histogram(y_sig, 20, weights=w_sig, range = (0, 1))
+        c_bkg , edges = np.histogram(y_bkg, 20, weights=w_bkg, range = (0, 1))
+
+        Z0_func = lambda s, b: np.sqrt( 2*((s+b)*  np.log1p (s/b) - s))
+        z_list = [Z0_func(si, bi) for si, bi in zip(c_sig, c_bkg) if bi > 0 and si > 0]
+        Z0 = np.sqrt( np.sum( np.square(z_list) )  )
+
+        self.add_to_history(Z0)
+
+        print("\nINFO: Significance in epoch {} is Z0 = {}".format(epoch, Z0))
+        
diff --git a/freeforestml/plot.py b/freeforestml/plot.py
index 3eda8b4b8ed6fbbfcbb5a053dbf329eef346f08a..4a367a7819bff9723eea1f520e5986d0061333d1 100644
--- a/freeforestml/plot.py
+++ b/freeforestml/plot.py
@@ -333,9 +333,9 @@ def hist(dataframe, variable, bins, stacks, selection=None,
                           for yield_name in stack_item.yield_names]
         return sum(process_totals)
 
-    if y_log:
-        for stack in uhepp_obj.stacks:
-            stack.content.sort(key=lambda x: total_stackitem(uhepp_obj, x))
+    #if y_log:
+        #for stack in uhepp_obj.stacks:
+            #stack.content.sort(key=lambda x: total_stackitem(uhepp_obj, x))
 
     ##################
     # Vertical lines
@@ -579,7 +579,7 @@ def correlation_matrix(df, variables,
                         columns=[v.name for v in variables],
                         index=[v.name for v in variables])
 
-    sns.heatmap(data * 100, **dict(vmin=-100, vmax=100, cmap="RdBu_r", ax=axes,
+    res = sns.heatmap(data * 100, **dict(vmin=-100, vmax=100, cmap="RdBu_r", ax=axes,
                            cbar_kws={
                                'label': "Correlation coefficient $\\rho \\cdot 100$"
                                }
@@ -591,4 +591,4 @@ def correlation_matrix(df, variables,
         atlasify(*fill_labels(atlas, info), axes=axes, outside=True)
         figure.subplots_adjust(top=1/enlarge)
 
-    return data
+    return res, data
diff --git a/freeforestml/stack.py b/freeforestml/stack.py
index 0aba6319ba80725e335d7bde08c74157bf090d90..ba6511175b15866885961327ac2c2698e986d56e 100644
--- a/freeforestml/stack.py
+++ b/freeforestml/stack.py
@@ -68,7 +68,7 @@ class Stack:
         self.data_uncertainties.append(data_uncertainty)
         self.aux.append(aux)
 
-    def get_hist(self, df, i, bins, variable, weight, include_outside=False):
+    def get_hist(self, df, i, bins, variable, weight, range = None, include_outside=False):
         """
         Returns the yields per bin for the i-th process in the stack. The bins
         argument specifies the bin edges.
@@ -93,7 +93,7 @@ class Stack:
         else:
             func = np.histogram
 
-        total, _ = func(variable, bins=bins, weights=weight)
+        total, _ = func(variable, bins=bins, weights=weight, range = range)
         return total
 
     def get_total(self, df, bins, variable, weight, include_outside=False):
@@ -108,7 +108,7 @@ class Stack:
 
         return total
 
-    def get_uncertainty(self, df, i, bins, variable, weight,
+    def get_uncertainty(self, df, i, bins, variable, weight, range = None, 
                               include_outside=False):
         """
         Returns the uncertainty of the total yield per bin. The bins argument
@@ -121,11 +121,11 @@ class Stack:
         uncertainty_2 = 0
 
         if self.is_data_uncertainty(i):
-            uncertainty_2 += self.get_hist(df, i, bins, variable, weight,
+            uncertainty_2 += self.get_hist(df, i, bins, variable, weight, range = range, 
                                            include_outside=include_outside)
         else:
             uncertainty_2 += self.get_hist(df, i, bins, variable,
-                                           weight=weight_2,
+                                           weight=weight_2, range = range, 
                                            include_outside=include_outside)
 
         return np.sqrt(uncertainty_2)
diff --git a/freeforestml/tests/test_model.py b/freeforestml/tests/test_model.py
index d7c9b809bd5fd749bedd6d0a1c92e24f172ced70..c596b988e1340719c46e29afb1ba90eb3d82b231 100644
--- a/freeforestml/tests/test_model.py
+++ b/freeforestml/tests/test_model.py
@@ -908,8 +908,7 @@ class HepNetTestCase(unittest.TestCase):
         df["is_sig"] = (df.fpid == 1)
         df["is_ztt"] = (df.fpid == 0)
 
-        net.fit(df.compute(), epochs=5, verbose=0, 
-                weight=Variable("weight", "weight"))
+        net.fit(df, epochs=5, verbose=0)
 
         fd, path = tempfile.mkstemp()