From 3d96fa3240dfdb67663534942c391c90be292c57 Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Tue, 19 Mar 2024 12:37:44 +0100
Subject: [PATCH 1/5] do not modify df in place

---
 quickstats/analysis/event_categorization.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/quickstats/analysis/event_categorization.py b/quickstats/analysis/event_categorization.py
index 5c5dba32..ceb86162 100644
--- a/quickstats/analysis/event_categorization.py
+++ b/quickstats/analysis/event_categorization.py
@@ -655,13 +655,13 @@ class EventCategorization(DataLoader):
     def initialize_df(self, df:pd.DataFrame, sample:str,
                       apply_score:bool=True,
                       event_indices:Optional[np.ndarray]=None):
-        
+        df = df.copy()
         # evaluate observable if not already exists
         self.apply_observable(df)
         
         # apply blinding
         if self.is_data_sample(sample) and self.do_blind:
-            df = self.get_blind_df(df.copy())
+            df = self.get_blind_df(df)
         
         use_custom_events = event_indices is not None
         weight_name = self.names["weight"]
@@ -692,7 +692,8 @@ class EventCategorization(DataLoader):
         # evaluate MVA score
         if apply_score:
             self.apply_score(df)
-
+        return df
+        
     def get_samples_to_load(self, channel:Optional[str]=None):
         channel = self._parse_channel(channel)
         channel_config = self.channel_configs[channel]
@@ -730,9 +731,9 @@ class EventCategorization(DataLoader):
                 sample_event_indices = event_indices.get(sample, None)
             else:
                 sample_event_indices = None
-            self.initialize_df(sample_df, sample=sample,
-                               apply_score=apply_score,
-                               event_indices=sample_event_indices)
+            sample_df = self.initialize_df(sample_df, sample=sample,
+                                           apply_score=apply_score,
+                                           event_indices=sample_event_indices)
             channel_df[sample] = sample_df
         self.channel_df = channel_df
     
-- 
GitLab


From f7848bd34ffccb9cef5cb0178a17f514f94f8c9d Mon Sep 17 00:00:00 2001
From: Yizhou Cai <yizhou.cai@cern.ch>
Date: Thu, 14 Mar 2024 18:09:49 +0100
Subject: [PATCH 2/5] apply threshold in correlation matrix

---
 quickstats/clis/nuisance_parameter_tools.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/quickstats/clis/nuisance_parameter_tools.py b/quickstats/clis/nuisance_parameter_tools.py
index 06d39e87..5e90a667 100644
--- a/quickstats/clis/nuisance_parameter_tools.py
+++ b/quickstats/clis/nuisance_parameter_tools.py
@@ -224,6 +224,8 @@ def plot_pulls(**kwargs):
 @click.option('--plot_style', default="default", show_default=True,
               help='Plot style if save_plot is enabled. Choose between "default" and '
                    '"viridis". Alternatively, a path to a yaml config file can be used')
+@click.option('--threshold', type=float, default=0., show_default=True,
+              help='Threshold for NP correlation matrix, below which the correlation is ignored when plotting.')
 @click.option('-w', '--workspace', 'ws_name', default=None, show_default=True,
               help='Name of workspace. Auto-detect by default.')
 @click.option('-m', '--model_config', 'mc_name', default=None, show_default=True,
@@ -276,7 +278,7 @@ def np_correlation(**kwargs):
     """
     _kwargs = {}
     for arg_name in ["basename", "save_plot", "save_json", "save_root",
-                     "plot_style", "select", "remove"]:
+                     "plot_style", "select", "remove", "threshold"]:
         _kwargs[arg_name] = kwargs.pop(arg_name)
     _init_kwargs = {}
     for arg_name in ["filename", "data_name", "verbosity"]:
@@ -313,6 +315,10 @@ def np_correlation(**kwargs):
         if _kwargs['save_plot']:
             import matplotlib.pyplot as plt
             from quickstats.plots import CorrelationPlot
+            if _kwargs['threshold'] > 0:
+                cols_count = (df.abs() >= _kwargs['threshold']).sum(axis=0)
+                cols_to_keep = cols_count[cols_count > 1].index # the diagonal element is always 1
+                df = df.loc[cols_to_keep, cols_to_keep]
             plotter = CorrelationPlot(df)
             plotter.draw_style(_kwargs['plot_style'])
             outname = basename + ".pdf"
-- 
GitLab


From 0c38dbe754c4080c40cc6316ee784527b2f00218 Mon Sep 17 00:00:00 2001
From: Yizhou Cai <yizhou.cai@cern.ch>
Date: Fri, 15 Mar 2024 09:05:53 +0100
Subject: [PATCH 3/5] modify help

---
 quickstats/clis/nuisance_parameter_tools.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/quickstats/clis/nuisance_parameter_tools.py b/quickstats/clis/nuisance_parameter_tools.py
index 5e90a667..29944d7d 100644
--- a/quickstats/clis/nuisance_parameter_tools.py
+++ b/quickstats/clis/nuisance_parameter_tools.py
@@ -225,7 +225,8 @@ def plot_pulls(**kwargs):
               help='Plot style if save_plot is enabled. Choose between "default" and '
                    '"viridis". Alternatively, a path to a yaml config file can be used')
 @click.option('--threshold', type=float, default=0., show_default=True,
-              help='Threshold for NP correlation matrix, below which the correlation is ignored when plotting.')
+              help='Require at least one correlation (except itself) to be larger than this threshold'
+                    ' value, otherwise not shown in plot.')
 @click.option('-w', '--workspace', 'ws_name', default=None, show_default=True,
               help='Name of workspace. Auto-detect by default.')
 @click.option('-m', '--model_config', 'mc_name', default=None, show_default=True,
-- 
GitLab


From 3e2647d052c3044d22e0286717ab7f0daba03340 Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Mon, 18 Mar 2024 13:50:25 +0100
Subject: [PATCH 4/5] fix missing option in cli

---
 quickstats/clis/workspace_tools.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/quickstats/clis/workspace_tools.py b/quickstats/clis/workspace_tools.py
index 1c14c257..5673fdf8 100644
--- a/quickstats/clis/workspace_tools.py
+++ b/quickstats/clis/workspace_tools.py
@@ -65,6 +65,8 @@ def inspect_ws(input_file, ws_name=None, data_name=None, mc_name=None, output_fi
               help='Enable minimizer offsetting.')
 @click.option('--offset/--no-offset', default=True, show_default=True,
               help='Offset likelihood.')
+@click.option('--use-binned/--use-unbinned', default=False, show_default=True,
+              help='Whether to convert unbinned dataset to binned dataset.')
 @click.option('-c', '--num_cpu', type=int, default=1, show_default=True,
               help='Number of CPUs to use during minimization.')
 @click.option('--apply-fix/--do-not-apply-fix', default=False, show_default=True,
-- 
GitLab


From bbd582761df1708b62a82c825d88daced30ba1d2 Mon Sep 17 00:00:00 2001
From: Rui Zhang <rui.zhang@cern.ch>
Date: Tue, 19 Mar 2024 11:56:45 +0100
Subject: [PATCH 5/5] Minor fix on cache_only option and fix on conflicts on
 plotting options

---
 quickstats/concurrent/parameterised_runner.py |  6 +++---
 quickstats/plots/template.py                  |  2 +-
 quickstats/plots/upper_limit_1D_plot.py       | 16 ----------------
 3 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/quickstats/concurrent/parameterised_runner.py b/quickstats/concurrent/parameterised_runner.py
index 355655e8..f1fcdd06 100644
--- a/quickstats/concurrent/parameterised_runner.py
+++ b/quickstats/concurrent/parameterised_runner.py
@@ -125,9 +125,9 @@ class ParameterisedRunner(AbstractRunner):
     def prepare_task_inputs(self)->Tuple[List, Dict]:
         raise NotImplementedError
     
-    def run(self):
+    def run(self, cache_only:bool=False):
         kwarg_set, auxiliary_args = self.prepare_task_inputs()
-        return self.run_batch(kwarg_set, auxiliary_args=auxiliary_args)
+        return self.run_batch(kwarg_set, auxiliary_args=auxiliary_args, cache_only=cache_only)
 
     def _end_of_instance_cleanup(self):
         ROOT.gROOT.CloseFiles()
@@ -144,4 +144,4 @@ class ParameterisedRunner(AbstractRunner):
             components.append(setup)
         if not components:
             return None
-        return ",".join(components)
\ No newline at end of file
+        return ",".join(components)
diff --git a/quickstats/plots/template.py b/quickstats/plots/template.py
index bc9ead3a..0be787da 100644
--- a/quickstats/plots/template.py
+++ b/quickstats/plots/template.py
@@ -819,4 +819,4 @@ def remake_handles(handles:List, polygon_to_line:bool=True, fill_border:bool=Tru
         else:
             new_subhandles = tuple(new_subhandles)
         new_handles.append(new_subhandles)
-    return new_handles
\ No newline at end of file
+    return new_handles
diff --git a/quickstats/plots/upper_limit_1D_plot.py b/quickstats/plots/upper_limit_1D_plot.py
index e4e1da3d..2724526f 100644
--- a/quickstats/plots/upper_limit_1D_plot.py
+++ b/quickstats/plots/upper_limit_1D_plot.py
@@ -139,8 +139,6 @@ class UpperLimit1DPlot(AbstractPlot):
                 observed_handle = (handle_1, handle_2)
                 if add_text:
                     ax.text(text_pos['observed'], i + 0.5, f"{{:.{sig_fig}f}}".format(observed_limit),
-                            horizontalalignment='center',
-                            verticalalignment='center',
                             transform=transform,
                             **self.styles['text'])
             else:
@@ -150,8 +148,6 @@ class UpperLimit1DPlot(AbstractPlot):
                 stat_limit = df['stat']
                 if add_text:
                     ax.text(text_pos['stat'], i + 0.5, f"({{:.{sig_fig}f}})".format(stat_limit),
-                            horizontalalignment='center',
-                            verticalalignment='center',
                             transform=transform,
                             **self.styles['text'])
             # draw expected
@@ -160,8 +156,6 @@ class UpperLimit1DPlot(AbstractPlot):
                                         zorder=1.1, label=self.labels['expected'])
             if add_text:
                 ax.text(text_pos['expected'], i + 0.5, f"{{:.{sig_fig}f}}".format(expected_limit),
-                        horizontalalignment='center',
-                        verticalalignment='center',
                         transform=transform,
                         **self.styles['text'])
             # draw third
@@ -171,8 +165,6 @@ class UpperLimit1DPlot(AbstractPlot):
                                          zorder=1.1, label=self.labels['third'])
                 if add_text:
                     ax.text(text_pos['third'], i + 0.5, f"{{:.{sig_fig}f}}".format(third_limit),
-                            horizontalalignment='center',
-                            verticalalignment='center',
                             transform=transform,
                             **self.styles['text'])
             else:
@@ -216,25 +208,17 @@ class UpperLimit1DPlot(AbstractPlot):
         if add_text:
             if draw_observed:
                 ax.text(text_pos['observed'], n_category + 0.3, 'Obs.',
-                        horizontalalignment='center',
-                        verticalalignment='center',
                         transform=transform,
                         **self.styles['text'])
             if draw_stat:
                 ax.text(text_pos['stat'], n_category + 0.3, '(Stat.)',
-                        horizontalalignment='center',
-                        verticalalignment='center',
                         transform=transform,
                         **self.styles['text'])
             if draw_third_column:
                 ax.text(text_pos['third'], n_category + 0.3, draw_third_column,
-                        horizontalalignment='center',
-                        verticalalignment='center',
                         transform=transform,
                         **self.styles['text'])
             ax.text(text_pos['expected'], n_category + 0.3, 'Exp.',
-                    horizontalalignment='center',
-                    verticalalignment='center',
                     transform=transform,
                     **self.styles['text'])
         if self.curve_data is not None:
-- 
GitLab