From 3d96fa3240dfdb67663534942c391c90be292c57 Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Tue, 19 Mar 2024 12:37:44 +0100 Subject: [PATCH 1/5] do not modify df in place --- quickstats/analysis/event_categorization.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/quickstats/analysis/event_categorization.py b/quickstats/analysis/event_categorization.py index 5c5dba32..ceb86162 100644 --- a/quickstats/analysis/event_categorization.py +++ b/quickstats/analysis/event_categorization.py @@ -655,13 +655,13 @@ class EventCategorization(DataLoader): def initialize_df(self, df:pd.DataFrame, sample:str, apply_score:bool=True, event_indices:Optional[np.ndarray]=None): - + df = df.copy() # evaluate observable if not already exists self.apply_observable(df) # apply blinding if self.is_data_sample(sample) and self.do_blind: - df = self.get_blind_df(df.copy()) + df = self.get_blind_df(df) use_custom_events = event_indices is not None weight_name = self.names["weight"] @@ -692,7 +692,8 @@ class EventCategorization(DataLoader): # evaluate MVA score if apply_score: self.apply_score(df) - + return df + def get_samples_to_load(self, channel:Optional[str]=None): channel = self._parse_channel(channel) channel_config = self.channel_configs[channel] @@ -730,9 +731,9 @@ class EventCategorization(DataLoader): sample_event_indices = event_indices.get(sample, None) else: sample_event_indices = None - self.initialize_df(sample_df, sample=sample, - apply_score=apply_score, - event_indices=sample_event_indices) + sample_df = self.initialize_df(sample_df, sample=sample, + apply_score=apply_score, + event_indices=sample_event_indices) channel_df[sample] = sample_df self.channel_df = channel_df -- GitLab From f7848bd34ffccb9cef5cb0178a17f514f94f8c9d Mon Sep 17 00:00:00 2001 From: Yizhou Cai <yizhou.cai@cern.ch> Date: Thu, 14 Mar 2024 18:09:49 +0100 Subject: [PATCH 2/5] apply threshold in correlation matrix --- quickstats/clis/nuisance_parameter_tools.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/quickstats/clis/nuisance_parameter_tools.py b/quickstats/clis/nuisance_parameter_tools.py index 06d39e87..5e90a667 100644 --- a/quickstats/clis/nuisance_parameter_tools.py +++ b/quickstats/clis/nuisance_parameter_tools.py @@ -224,6 +224,8 @@ def plot_pulls(**kwargs): @click.option('--plot_style', default="default", show_default=True, help='Plot style if save_plot is enabled. Choose between "default" and ' '"viridis". Alternatively, a path to a yaml config file can be used') +@click.option('--threshold', type=float, default=0., show_default=True, + help='Threshold for NP correlation matrix, below which the correlation is ignored when plotting.') @click.option('-w', '--workspace', 'ws_name', default=None, show_default=True, help='Name of workspace. Auto-detect by default.') @click.option('-m', '--model_config', 'mc_name', default=None, show_default=True, @@ -276,7 +278,7 @@ def np_correlation(**kwargs): """ _kwargs = {} for arg_name in ["basename", "save_plot", "save_json", "save_root", - "plot_style", "select", "remove"]: + "plot_style", "select", "remove", "threshold"]: _kwargs[arg_name] = kwargs.pop(arg_name) _init_kwargs = {} for arg_name in ["filename", "data_name", "verbosity"]: @@ -313,6 +315,10 @@ def np_correlation(**kwargs): if _kwargs['save_plot']: import matplotlib.pyplot as plt from quickstats.plots import CorrelationPlot + if _kwargs['threshold'] > 0: + cols_count = (df.abs() >= _kwargs['threshold']).sum(axis=0) + cols_to_keep = cols_count[cols_count > 1].index # the diagonal element is always 1 + df = df.loc[cols_to_keep, cols_to_keep] plotter = CorrelationPlot(df) plotter.draw_style(_kwargs['plot_style']) outname = basename + ".pdf" -- GitLab From 0c38dbe754c4080c40cc6316ee784527b2f00218 Mon Sep 17 00:00:00 2001 From: Yizhou Cai <yizhou.cai@cern.ch> Date: Fri, 15 Mar 2024 09:05:53 +0100 Subject: [PATCH 3/5] modify help --- quickstats/clis/nuisance_parameter_tools.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/quickstats/clis/nuisance_parameter_tools.py b/quickstats/clis/nuisance_parameter_tools.py index 5e90a667..29944d7d 100644 --- a/quickstats/clis/nuisance_parameter_tools.py +++ b/quickstats/clis/nuisance_parameter_tools.py @@ -225,7 +225,8 @@ def plot_pulls(**kwargs): help='Plot style if save_plot is enabled. Choose between "default" and ' '"viridis". Alternatively, a path to a yaml config file can be used') @click.option('--threshold', type=float, default=0., show_default=True, - help='Threshold for NP correlation matrix, below which the correlation is ignored when plotting.') + help='Require at least one correlation (except itself) to be larger than this threshold' + ' value, otherwise not shown in plot.') @click.option('-w', '--workspace', 'ws_name', default=None, show_default=True, help='Name of workspace. Auto-detect by default.') @click.option('-m', '--model_config', 'mc_name', default=None, show_default=True, -- GitLab From 3e2647d052c3044d22e0286717ab7f0daba03340 Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Mon, 18 Mar 2024 13:50:25 +0100 Subject: [PATCH 4/5] fix missing option in cli --- quickstats/clis/workspace_tools.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/quickstats/clis/workspace_tools.py b/quickstats/clis/workspace_tools.py index 1c14c257..5673fdf8 100644 --- a/quickstats/clis/workspace_tools.py +++ b/quickstats/clis/workspace_tools.py @@ -65,6 +65,8 @@ def inspect_ws(input_file, ws_name=None, data_name=None, mc_name=None, output_fi help='Enable minimizer offsetting.') @click.option('--offset/--no-offset', default=True, show_default=True, help='Offset likelihood.') +@click.option('--use-binned/--use-unbinned', default=False, show_default=True, + help='Whether to convert unbinned dataset to binned dataset.') @click.option('-c', '--num_cpu', type=int, default=1, show_default=True, help='Number of CPUs to use during minimization.') @click.option('--apply-fix/--do-not-apply-fix', default=False, show_default=True, -- GitLab From bbd582761df1708b62a82c825d88daced30ba1d2 Mon Sep 17 00:00:00 2001 From: Rui Zhang <rui.zhang@cern.ch> Date: Tue, 19 Mar 2024 11:56:45 +0100 Subject: [PATCH 5/5] Minor fix on cache_only option and fix on conflicts on plotting options --- quickstats/concurrent/parameterised_runner.py | 6 +++--- quickstats/plots/template.py | 2 +- quickstats/plots/upper_limit_1D_plot.py | 16 ---------------- 3 files changed, 4 insertions(+), 20 deletions(-) diff --git a/quickstats/concurrent/parameterised_runner.py b/quickstats/concurrent/parameterised_runner.py index 355655e8..f1fcdd06 100644 --- a/quickstats/concurrent/parameterised_runner.py +++ b/quickstats/concurrent/parameterised_runner.py @@ -125,9 +125,9 @@ class ParameterisedRunner(AbstractRunner): def prepare_task_inputs(self)->Tuple[List, Dict]: raise NotImplementedError - def run(self): + def run(self, cache_only:bool=False): kwarg_set, auxiliary_args = self.prepare_task_inputs() - return self.run_batch(kwarg_set, auxiliary_args=auxiliary_args) + return self.run_batch(kwarg_set, auxiliary_args=auxiliary_args, cache_only=cache_only) def _end_of_instance_cleanup(self): ROOT.gROOT.CloseFiles() @@ -144,4 +144,4 @@ class ParameterisedRunner(AbstractRunner): components.append(setup) if not components: return None - return ",".join(components) \ No newline at end of file + return ",".join(components) diff --git a/quickstats/plots/template.py b/quickstats/plots/template.py index bc9ead3a..0be787da 100644 --- a/quickstats/plots/template.py +++ b/quickstats/plots/template.py @@ -819,4 +819,4 @@ def remake_handles(handles:List, polygon_to_line:bool=True, fill_border:bool=Tru else: new_subhandles = tuple(new_subhandles) new_handles.append(new_subhandles) - return new_handles \ No newline at end of file + return new_handles diff --git a/quickstats/plots/upper_limit_1D_plot.py b/quickstats/plots/upper_limit_1D_plot.py index e4e1da3d..2724526f 100644 --- a/quickstats/plots/upper_limit_1D_plot.py +++ b/quickstats/plots/upper_limit_1D_plot.py @@ -139,8 +139,6 @@ class UpperLimit1DPlot(AbstractPlot): observed_handle = (handle_1, handle_2) if add_text: ax.text(text_pos['observed'], i + 0.5, f"{{:.{sig_fig}f}}".format(observed_limit), - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) else: @@ -150,8 +148,6 @@ class UpperLimit1DPlot(AbstractPlot): stat_limit = df['stat'] if add_text: ax.text(text_pos['stat'], i + 0.5, f"({{:.{sig_fig}f}})".format(stat_limit), - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) # draw expected @@ -160,8 +156,6 @@ class UpperLimit1DPlot(AbstractPlot): zorder=1.1, label=self.labels['expected']) if add_text: ax.text(text_pos['expected'], i + 0.5, f"{{:.{sig_fig}f}}".format(expected_limit), - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) # draw third @@ -171,8 +165,6 @@ class UpperLimit1DPlot(AbstractPlot): zorder=1.1, label=self.labels['third']) if add_text: ax.text(text_pos['third'], i + 0.5, f"{{:.{sig_fig}f}}".format(third_limit), - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) else: @@ -216,25 +208,17 @@ class UpperLimit1DPlot(AbstractPlot): if add_text: if draw_observed: ax.text(text_pos['observed'], n_category + 0.3, 'Obs.', - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) if draw_stat: ax.text(text_pos['stat'], n_category + 0.3, '(Stat.)', - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) if draw_third_column: ax.text(text_pos['third'], n_category + 0.3, draw_third_column, - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) ax.text(text_pos['expected'], n_category + 0.3, 'Exp.', - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) if self.curve_data is not None: -- GitLab