diff --git a/quickstats/analysis/event_categorization.py b/quickstats/analysis/event_categorization.py index 5c5dba329a410698470943a32e10a0a7573807d9..ceb86162f8ca42d6e2db995d242f3f08d0fec080 100644 --- a/quickstats/analysis/event_categorization.py +++ b/quickstats/analysis/event_categorization.py @@ -655,13 +655,13 @@ class EventCategorization(DataLoader): def initialize_df(self, df:pd.DataFrame, sample:str, apply_score:bool=True, event_indices:Optional[np.ndarray]=None): - + df = df.copy() # evaluate observable if not already exists self.apply_observable(df) # apply blinding if self.is_data_sample(sample) and self.do_blind: - df = self.get_blind_df(df.copy()) + df = self.get_blind_df(df) use_custom_events = event_indices is not None weight_name = self.names["weight"] @@ -692,7 +692,8 @@ class EventCategorization(DataLoader): # evaluate MVA score if apply_score: self.apply_score(df) - + return df + def get_samples_to_load(self, channel:Optional[str]=None): channel = self._parse_channel(channel) channel_config = self.channel_configs[channel] @@ -730,9 +731,9 @@ class EventCategorization(DataLoader): sample_event_indices = event_indices.get(sample, None) else: sample_event_indices = None - self.initialize_df(sample_df, sample=sample, - apply_score=apply_score, - event_indices=sample_event_indices) + sample_df = self.initialize_df(sample_df, sample=sample, + apply_score=apply_score, + event_indices=sample_event_indices) channel_df[sample] = sample_df self.channel_df = channel_df diff --git a/quickstats/clis/nuisance_parameter_tools.py b/quickstats/clis/nuisance_parameter_tools.py index 06d39e87a4b664d1c3776d6a8bcba8ad46991efe..29944d7d74b00922cc4324d86901df21e3fde9d9 100644 --- a/quickstats/clis/nuisance_parameter_tools.py +++ b/quickstats/clis/nuisance_parameter_tools.py @@ -224,6 +224,9 @@ def plot_pulls(**kwargs): @click.option('--plot_style', default="default", show_default=True, help='Plot style if save_plot is enabled. Choose between "default" and ' '"viridis". Alternatively, a path to a yaml config file can be used') +@click.option('--threshold', type=float, default=0., show_default=True, + help='Require at least one correlation (except itself) to be larger than this threshold' + ' value, otherwise not shown in plot.') @click.option('-w', '--workspace', 'ws_name', default=None, show_default=True, help='Name of workspace. Auto-detect by default.') @click.option('-m', '--model_config', 'mc_name', default=None, show_default=True, @@ -276,7 +279,7 @@ def np_correlation(**kwargs): """ _kwargs = {} for arg_name in ["basename", "save_plot", "save_json", "save_root", - "plot_style", "select", "remove"]: + "plot_style", "select", "remove", "threshold"]: _kwargs[arg_name] = kwargs.pop(arg_name) _init_kwargs = {} for arg_name in ["filename", "data_name", "verbosity"]: @@ -313,6 +316,10 @@ def np_correlation(**kwargs): if _kwargs['save_plot']: import matplotlib.pyplot as plt from quickstats.plots import CorrelationPlot + if _kwargs['threshold'] > 0: + cols_count = (df.abs() >= _kwargs['threshold']).sum(axis=0) + cols_to_keep = cols_count[cols_count > 1].index # the diagonal element is always 1 + df = df.loc[cols_to_keep, cols_to_keep] plotter = CorrelationPlot(df) plotter.draw_style(_kwargs['plot_style']) outname = basename + ".pdf" diff --git a/quickstats/clis/workspace_tools.py b/quickstats/clis/workspace_tools.py index 1c14c257251c46f12a4ef7d97e7c5077cbaba614..5673fdf896b007b634f4e6a7e05ca1940bbb3c30 100644 --- a/quickstats/clis/workspace_tools.py +++ b/quickstats/clis/workspace_tools.py @@ -65,6 +65,8 @@ def inspect_ws(input_file, ws_name=None, data_name=None, mc_name=None, output_fi help='Enable minimizer offsetting.') @click.option('--offset/--no-offset', default=True, show_default=True, help='Offset likelihood.') +@click.option('--use-binned/--use-unbinned', default=False, show_default=True, + help='Whether to convert unbinned dataset to binned dataset.') @click.option('-c', '--num_cpu', type=int, default=1, show_default=True, help='Number of CPUs to use during minimization.') @click.option('--apply-fix/--do-not-apply-fix', default=False, show_default=True, diff --git a/quickstats/concurrent/parameterised_runner.py b/quickstats/concurrent/parameterised_runner.py index 355655e8b8171aca2c4fd6852b36e5a370de59e2..f1fcdd0677b781f90125a42d44f58f6aeab4b3fd 100644 --- a/quickstats/concurrent/parameterised_runner.py +++ b/quickstats/concurrent/parameterised_runner.py @@ -125,9 +125,9 @@ class ParameterisedRunner(AbstractRunner): def prepare_task_inputs(self)->Tuple[List, Dict]: raise NotImplementedError - def run(self): + def run(self, cache_only:bool=False): kwarg_set, auxiliary_args = self.prepare_task_inputs() - return self.run_batch(kwarg_set, auxiliary_args=auxiliary_args) + return self.run_batch(kwarg_set, auxiliary_args=auxiliary_args, cache_only=cache_only) def _end_of_instance_cleanup(self): ROOT.gROOT.CloseFiles() @@ -144,4 +144,4 @@ class ParameterisedRunner(AbstractRunner): components.append(setup) if not components: return None - return ",".join(components) \ No newline at end of file + return ",".join(components) diff --git a/quickstats/plots/template.py b/quickstats/plots/template.py index bc9ead3a3107608e7f4b73aa42b013d921207301..0be787dadec38a85848c812ef12be83759adfbd8 100644 --- a/quickstats/plots/template.py +++ b/quickstats/plots/template.py @@ -819,4 +819,4 @@ def remake_handles(handles:List, polygon_to_line:bool=True, fill_border:bool=Tru else: new_subhandles = tuple(new_subhandles) new_handles.append(new_subhandles) - return new_handles \ No newline at end of file + return new_handles diff --git a/quickstats/plots/upper_limit_1D_plot.py b/quickstats/plots/upper_limit_1D_plot.py index e4e1da3d7817aaca03b9b1aa7d64bee8d4e48e20..2724526fbd4db237911556dab1878314992ca0cc 100644 --- a/quickstats/plots/upper_limit_1D_plot.py +++ b/quickstats/plots/upper_limit_1D_plot.py @@ -139,8 +139,6 @@ class UpperLimit1DPlot(AbstractPlot): observed_handle = (handle_1, handle_2) if add_text: ax.text(text_pos['observed'], i + 0.5, f"{{:.{sig_fig}f}}".format(observed_limit), - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) else: @@ -150,8 +148,6 @@ class UpperLimit1DPlot(AbstractPlot): stat_limit = df['stat'] if add_text: ax.text(text_pos['stat'], i + 0.5, f"({{:.{sig_fig}f}})".format(stat_limit), - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) # draw expected @@ -160,8 +156,6 @@ class UpperLimit1DPlot(AbstractPlot): zorder=1.1, label=self.labels['expected']) if add_text: ax.text(text_pos['expected'], i + 0.5, f"{{:.{sig_fig}f}}".format(expected_limit), - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) # draw third @@ -171,8 +165,6 @@ class UpperLimit1DPlot(AbstractPlot): zorder=1.1, label=self.labels['third']) if add_text: ax.text(text_pos['third'], i + 0.5, f"{{:.{sig_fig}f}}".format(third_limit), - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) else: @@ -216,25 +208,17 @@ class UpperLimit1DPlot(AbstractPlot): if add_text: if draw_observed: ax.text(text_pos['observed'], n_category + 0.3, 'Obs.', - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) if draw_stat: ax.text(text_pos['stat'], n_category + 0.3, '(Stat.)', - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) if draw_third_column: ax.text(text_pos['third'], n_category + 0.3, draw_third_column, - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) ax.text(text_pos['expected'], n_category + 0.3, 'Exp.', - horizontalalignment='center', - verticalalignment='center', transform=transform, **self.styles['text']) if self.curve_data is not None: