diff --git a/.ci/bbtautau/klambda.json b/.ci/bbtautau/klambda.json index c81d2e2d4141d62b15fe9618bf5ba46c36889694..7d94124ef835b838eace82109f416306dbc01fb4 100644 --- a/.ci/bbtautau/klambda.json +++ b/.ci/bbtautau/klambda.json @@ -1,27 +1,27 @@ { - "nll": [ - 290961.64047893154, - 290961.7870285931, - 290961.65236822236 - ], - "qmu": [ - 0.0, - 0.2930993231711909, - 0.02377858164254576 - ], - "status": [ - 0, - 0, - 0 - ], - "klambda": [ - 1.497080183188742, - 0, - 1 - ], - "mu": [ - 1.497080183188742, - 0, - 1 - ] + "nll": [ + 290961.64047893154, + 290961.7870285931, + 290961.65236822236 + ], + "qmu": [ + 0.0, + 0.2930993231711909, + 0.02377858164254576 + ], + "status": [ + 0, + 0, + 0 + ], + "klambda": [ + 1.497080183188742, + 0, + 1 + ], + "mu": [ + 1.497080183188742, + 0, + 1 + ] } \ No newline at end of file diff --git a/.ci/bbyy/klambda.json b/.ci/bbyy/klambda.json index 7bc3217f3080c2bdd9d7cedca4d1272ffcbe0053..112fe12cf5654db3d39ec2a1e360ed85297751f1 100644 --- a/.ci/bbyy/klambda.json +++ b/.ci/bbyy/klambda.json @@ -1,27 +1,27 @@ { - "nll": [ - 517.9102262602762, - 518.7035184306969, - 518.2302598267265 - ], - "qmu": [ - 0.0, - 1.5865843408414548, - 0.64006713290064 - ], - "status": [ - 0, - 0, - 0 - ], - "klambda": [ - 2.7511436422832256, - 0, - 1 - ], - "mu": [ - 2.7511436422832256, - 0, - 1 - ] + "nll": [ + 517.9102262602762, + 518.7035184306969, + 518.2302598267265 + ], + "qmu": [ + 0.0, + 1.5865843408414548, + 0.64006713290064 + ], + "status": [ + 0, + 0, + 0 + ], + "klambda": [ + 2.7511436422832256, + 0, + 1 + ], + "mu": [ + 2.7511436422832256, + 0, + 1 + ] } \ No newline at end of file diff --git a/bin/quickstats b/bin/quickstats index cb8479db8c7e480f16c8efe85090b25573067131..c7867dbe4c5a21931235fade30b78c9d9654705d 100755 --- a/bin/quickstats +++ b/bin/quickstats @@ -1,44 +1,6 @@ #! /usr/bin/env python -import click -from quickstats.clis.core import (run_pulls, plot_pulls, compile_macros, - harmonize_np, cls_limit, generate_asimov, - generate_standard_asimov, toy_significance, - toy_limit, add_macro, remove_macro) -from quickstats.clis.inspect_ws import inspect_ws -from quickstats.clis.inspect_rfile import inspect_rfile -from quickstats.clis.limit_setting import limit_scan -from quickstats.clis.processor_cli import process_rfile -from quickstats.clis.likelihood_scan import likelihood_scan -from quickstats.clis.likelihood_fit import likelihood_fit, np_correlation -from quickstats.clis.workspace_tools import build_xml_ws, modify_ws, compare_ws, combine_ws, decompose_ws - -@click.group() -def cli(): - pass +from quickstats.clis import cli if __name__ == "__main__": - cli.add_command(compile_macros) - cli.add_command(add_macro) - cli.add_command(remove_macro) - cli.add_command(inspect_ws) - cli.add_command(toy_significance) - cli.add_command(toy_limit) - cli.add_command(run_pulls) - cli.add_command(plot_pulls) - cli.add_command(likelihood_fit) - cli.add_command(likelihood_scan) - cli.add_command(cls_limit) - cli.add_command(limit_scan) - cli.add_command(np_correlation) - cli.add_command(harmonize_np) - cli.add_command(generate_asimov) - cli.add_command(generate_standard_asimov) - cli.add_command(inspect_rfile) - cli.add_command(process_rfile) - cli.add_command(build_xml_ws) - cli.add_command(modify_ws) - cli.add_command(compare_ws) - cli.add_command(combine_ws) - cli.add_command(decompose_ws) cli() \ No newline at end of file diff --git a/quickstats/analysis/analysis_path_manager.py b/quickstats/analysis/analysis_path_manager.py index a3dfef7f9304dad4a9fc5d6f3e2a432007503559..911a59e74eb422219efe6c846c21f36591d97b1a 100644 --- a/quickstats/analysis/analysis_path_manager.py +++ b/quickstats/analysis/analysis_path_manager.py @@ -58,24 +58,21 @@ class AnalysisPathManager(PathManager): self.study_name = study_name self.raw_base_path = base_path self.update() + + def get_base_path(self): + if self.study_name is None: + return self.base_path + if self.base_path is None: + return self.study_name + return os.path.join(self.base_name, self.study_name) def update(self): - if self.raw_base_path is None: - if self.study_name is None: - base_path = None - else: - base_path = self.study_name - else: - if self.study_name is None: - base_path = self.raw_base_path - else: - base_path = os.path.join(self.raw_base_path, self.study_name) - self.base_path = base_path + pass def set_study_name(self, study_name:str): self.study_name = study_name self.update() def set_base_path(self, base_path:str): - self.raw_base_path = base_path + self.base_path = base_path self.update() \ No newline at end of file diff --git a/quickstats/analysis/event_categorization.py b/quickstats/analysis/event_categorization.py index b8e31b1b2ef88431b50824b8e0ed41033da5ea30..763e93e5bafcaf6f53b30465655c7ed15481356b 100644 --- a/quickstats/analysis/event_categorization.py +++ b/quickstats/analysis/event_categorization.py @@ -1094,12 +1094,12 @@ class EventCategorization(DataLoader): for sample in samples: category_df = self.get_category_df(sample, category_index) key = sample - if do_blind: + if self.do_blind: key = self.get_blind_sample_name(sample) if resampling: category_df = self.get_resampled_df(category_df, random_state=resampling_random_state) yields[category_name][key] = float(category_df[weight_name].sum()) - if is_data: + if self.is_data_sample(sample): #calculate poisson interval poisson_interval = get_poisson_interval([yields[category_name][key]]) yields_err[category_name][key] = {"errlo": poisson_interval["lo"][0], diff --git a/quickstats/clis/__init__.py b/quickstats/clis/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..f4b14a71589da4d55ad71473bee7743beaf284eb 100644 --- a/quickstats/clis/__init__.py +++ b/quickstats/clis/__init__.py @@ -0,0 +1,8 @@ +from .core import * +from .workspace_tools import * +from .limit_setting import * +from .likelihood_tools import * +from .nuisance_parameter_tools import * +from .stat_tools import * +from .inspect_rfile import * +from .processor_tools import * \ No newline at end of file diff --git a/quickstats/clis/core.py b/quickstats/clis/core.py index 7854455e63a46efd3fc950ee0e38b559d06b2889..1eba744db508f37dc37354f0790b7e4aba95cb1a 100644 --- a/quickstats/clis/core.py +++ b/quickstats/clis/core.py @@ -2,6 +2,30 @@ import os import json import click +__all__ = ['compile_macros', 'add_macro', 'remove_macro', 'cli'] + +class NaturalOrderGroup(click.Group): + """Command group trying to list subcommands in the order they were added. + + Make sure you initialize the `self.commands` with OrderedDict instance. + + With decorator, use:: + + @click.group(cls=NaturalOrderGroup, commands=OrderedDict()) + """ + + def list_commands(self, ctx): + """List command names as they are in commands dict. + + If the dict is OrderedDict, it will preserve the order commands + were added. + """ + return self.commands.keys() + +@click.group(cls=NaturalOrderGroup) +def cli(): + pass + class DelimitedStr(click.Option): def type_cast_value(self, ctx, value): try: @@ -9,281 +33,8 @@ class DelimitedStr(click.Option): except: raise click.BadParameter(value) -@click.command(name='run_pulls') -@click.option('-i', '--input_file', 'filename', required=True, - help='Path to the input workspace file') -@click.option('-x', '--poi', 'poi_name', default=None, - help='POI to measure NP impact on') -@click.option('-o', '--outdir', default="pulls", show_default=True, - help='Output directory') -@click.option('-w', '--workspace', 'ws_name', default=None, - help='Name of workspace. Auto-detect by default.') -@click.option('-m', '--model_config', 'mc_name', default=None, - help='Name of model config. Auto-detect by default.') -@click.option('-d', '--data', 'data_name', default='combData', show_default=True, - help='Name of dataset') -@click.option('--filter', 'filter_expr', default=None, show_default=True, - help='Filter nuisance parameter(s) to run pulls and impacts on.'+\ - 'Multiple parameters are separated by commas.'+\ - 'Wildcards are accepted. All NPs are included by default.') -@click.option('-r', '--profile', 'profile_param', default=None, show_default=True, - help='Parameters to profile') -@click.option('-f', '--fix', 'fix_param', default=None, show_default=True, - help='Parameters to fix') -@click.option('-s', '--snapshot', 'snapshot_name', default=None, show_default=True, - help='Name of initial snapshot') -@click.option('-t', '--minimizer_type', default="Minuit2", show_default=True, - help='Minimizer type') -@click.option('-a', '--minimizer_algo', default="Migrad", show_default=True, - help='Minimizer algorithm') -@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, - help='Number of CPUs to use per parameter') -@click.option('--binned/--unbinned', 'binned_likelihood', default=True, show_default=True, - help='Binned likelihood') -@click.option('-q', '--precision', type=float, default=0.001, show_default=True, - help='Precision for scan') -@click.option('-e', '--eps', type=float, default=1.0, show_default=True, - help='Convergence criterium') -@click.option('--retry', type=int, default=2, show_default=True, - help='Maximum number of retries upon a failed fit') -@click.option('--print_level', type=int, default=-1, show_default=True, - help='Minimizer print level') -@click.option('--strategy', type=int, default=1, show_default=True, - help='Default strategy') -@click.option('--fix-cache/--no-fix-cache', default=True, show_default=True, - help='Fix StarMomentMorph cache') -@click.option('--fix-multi/--no-fix-multi', default=True, show_default=True, - help='Fix MultiPdf level 2') -@click.option('--offset/--no-offset', default=True, show_default=True, - help='Offset likelihood') -@click.option('--optimize', type=int, default=2, show_default=True, - help='Optimize constant terms') -@click.option('--max_calls', type=int, default=-1, show_default=True, - help='Maximum number of function calls') -@click.option('--max_iters', type=int, default=-1, show_default=True, - help='Maximum number of Minuit iterations') -@click.option('--batch_mode/--no-batch', default=False, show_default=True, - help='Batch mode when evaluating likelihood') -@click.option('--int_bin_precision', type=float, default=-1., show_default=True, - help='Integrate the PDF over the bins instead of using the probability ' - 'density at the bin centre') -@click.option('--parallel', type=int, default=-1, show_default=True, - help='\b\n Parallelize job across the N workers.' - '\b\n Case 0: Jobs are run sequentially (for debugging).' - '\b\n Case -1: Jobs are run across N_CPU workers.') -@click.option('--cache/--no-cache', default=True, show_default=True, - help='Cache existing result') -@click.option('--exclude', 'exclude_expr', default=None, show_default=True, - help='Exclude NPs to run pulls and impacts on. '+\ - 'Multiple parameters are separated by commas.'+\ - 'Wildcards are accepted.') -@click.option('--save_log/--skip_log', default=True, show_default=True, - help='Save log file.') -@click.option('--constrained_only/--any_nuis', default=True, show_default=True, - help='Whether to include constrained nuisance parameters only') -@click.option('--version', type=click.Choice(['1', '2']), default='2', show_default=True, - help='Version of tool to use (Choose between 1 and 2).') -@click.option('-v', '--verbosity', default='INFO', show_default=True, - type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), - help='Verbosity level.') -def run_pulls(**kwargs): - """ - Tool for computing NP pulls and impacts - """ - version = kwargs.pop('version') - if version == '1': - from quickstats.components import NuisanceParameterPull - NuisanceParameterPull().run_pulls(**kwargs) - elif version == '2': - from quickstats.concurrent import NuisanceParameterRankingRunner - init_kwargs = {} - for key in ['filename', 'filter_expr', 'exclude_expr', 'poi_name', - 'data_name', 'cache', 'outdir', 'constrained_only', - 'save_log', 'parallel', 'verbosity']: - init_kwargs[key] = kwargs.pop(key) - init_kwargs['config'] = kwargs - runner = NuisanceParameterRankingRunner(**init_kwargs) - runner.run() - -@click.command(name='plot_pulls') -@click.option('-i', '--inputdir', required=True, help='Path to directory containing pull results') -@click.option('-p', '--poi', default=None, help='Parameter of interest for plotting impact') -@click.option('-n', '--n_rank', type=int, default=None, help='Total number of NP to rank') -@click.option('-m', '--rank_per_plot', type=int, default=20, show_default=True, - help='Number of NP to show in a single plot') -@click.option('--ranking/--no_ranking', default=True, show_default=True, - help='Rank NP by impact') -@click.option('--threshold', type=float, default=0., show_default=True, - help='Filter NP by postfit impact threshold') -@click.option('--show_sigma/--hide_sigma', default=True, show_default=True, - help='Show one standard deviation pull') -@click.option('--show_prefit/--hide_prefit', default=True, show_default=True, - help='Show prefit impact') -@click.option('--show_postfit/--hide_postfit', default=True, show_default=True, - help='Show postfit impact') -@click.option('--sigma_bands/--no_sigma_bands', default=False, show_default=True, - help='Draw +-1, +-2 sigma bands') -@click.option('--sigma_lines/--no_sigma_lines', default=True, show_default=True, - help='Draw +-1 sigma lines') -@click.option('--ranking_label/--no_ranking_label', default=True, show_default=True, - help='Show ranking labels') -@click.option('--shade/--no_shade', default=True, show_default=True, - help='Draw shade') -@click.option('--correlation/--no_correlation', default=True, show_default=True, - help='Show correlation impact') -@click.option('--onesided/--overlap', default=True, show_default=True, - help='Show onesided impact') -@click.option('--relative/--absolute', default=False, show_default=True, - help='Show relative variation') -@click.option('--theta_max', type=float, default=2, show_default=True, - help='Pull range') -@click.option('-y', '--padding', type=int, default=7, show_default=True, - help='Padding below plot for texts and legends. NP column height is 1 unit.') -@click.option('-h', '--height', type=float, default=1.0, show_default=True, - help='NP column height') -@click.option('-s', '--spacing', type=float, default=0., show_default=True, - help='Spacing between impact box') -@click.option('--label-fontsize', type=float, default=20., show_default=True, - help='Fontsize of analysis label text') -@click.option('-d', '--display_poi', default=r"$\mu$", show_default=True, - help='POI name to be shown in the plot') -@click.option('-t', '--extra_text', default=None, help='Extra texts below the ATLAS label. '+\ - 'Use "//" as newline delimiter') -@click.option('--elumi_label/--no_elumi_label', default=True, show_default=True, - help='Show energy and luminosity labels') -@click.option('--ranking_label/--no_ranking_label', default=True, show_default=True, - help='Show ranking label') -@click.option('--energy', default="13 TeV", show_default=True, - help='Beam energy') -@click.option('--lumi', default="140 fb$^{-1}$", show_default=True, - help='Luminosity') -@click.option('--status', default="int", show_default=True, - help='\b\n Analysis status. Choose from' - '\b\n int : Internal' - '\b\n wip : Work in Progress' - '\b\n prelim : Preliminary' - '\b\n final : *no status label*' - '\b\n *custom input* : *custom input*') -@click.option('--combine_pdf/--split_pdf', default=True, show_default=True, - help='Combine all ranking plots into a single pdf') -@click.option('--outdir', default='ranking_plots', show_default=True, - help='Output directory') -@click.option('-o', '--outname', default='ranking', show_default=True, - help='Output file name prefix') -@click.option('--style', default='default', show_default=True, - help='Plotting style. Built-in styles are "default" and "trex".'+\ - 'Specify path to yaml file to set custom plotting style.') -@click.option('--fix_axis_scale/--free_axis_scale', default=True, show_default=True, - help='Fix the axis scale across all ranking plots') -@click.option('--version', type=click.Choice(['1', '2']), default='2', show_default=True, - help='Version of tool to use (Choose between 1 and 2).') -def plot_pulls(**kwargs): - """ - Tool for plotting NP pulls and impact rankings - """ - from quickstats.plots.np_ranking_plot import NPRankingPlot - inputdir, poi = kwargs.pop('inputdir'), kwargs.pop('poi') - version = kwargs.pop('version') - ranking_plot = NPRankingPlot(inputdir, poi, version=version) - ranking_plot.plot(**kwargs) - -@click.command(name='cls_limit') -@click.option('-i', '--input_file', 'filename', required=True, - help='Path to the input workspace file') -@click.option('-p', '--poi', 'poi_name', default=None, show_default=True, - help='POI to scan. If not specified, the first POI from the workspace is used.') -@click.option('-d', '--data', 'data_name', default='combData', show_default=True, - help='Name of dataset') -@click.option('--asimov_data_name', 'asimov_data_name', default=None, - help='If given, use custom background asimov dataset instead of generating on the fly.') -@click.option('-o', '--outname', default='limits.json', show_default=True, - help='Name of output') -@click.option('--mu_exp', type=float, default=0, show_default=True, - help='Expected signal strengh value to be used for Asimov generation') -@click.option('--blind/--unblind', 'do_blind', default=True, show_default=True, - help='Blind/unblind analysis') -@click.option('--CL', 'CL', type=float, default=0.95, show_default=True, - help='CL value to use') -@click.option('--precision', default=0.005, show_default=True, - help='precision in mu that defines iterative cutoff') -@click.option('--adjust_fit_range/--keep_fit_range', default=True, show_default=True, - help='whether to adjust the fit range to median limit +- 5 sigma for observed fit') -@click.option('--do_tilde/--no_tilde', default=True, show_default=True, - help='bound mu at zero if true and do the \tilde{q}_{mu} asymptotics') -@click.option('--predictive_fit/--no_predictive_fit', default=False, show_default=True, - help='extrapolate best fit nuisance parameters based on previous fit results') -@click.option('--do_better_bands/--skip_better_bands', default=True, show_default=True, - help='evaluate asymptotic CLs limit for various sigma bands') -@click.option('--better_negative_bands/--skip_better_negative_bands', default=False, show_default=True, - help='evaluate asymptotic CLs limit for negative sigma bands') -@click.option('--binned/--unbinned', 'binned_likelihood', default=True, show_default=True, - help='Binned likelihood') -@click.option('--save_summary/--skip_summary', default=True, show_default=True, - help='Save summary information') -@click.option('-f', '--fix', 'fix_param', default="", show_default=True, - help='Parameters to fix') -@click.option('-r', '--profile', 'profile_param', default="", show_default=True, - help='Parameters to profile') -@click.option('-w', '--workspace', 'ws_name', default=None, show_default=True, - help='Name of workspace. Auto-detect by default.') -@click.option('-m', '--model_config', 'mc_name', default=None, show_default=True, - help='Name of model config. Auto-detect by default.') -@click.option('-s', '--snapshot', 'snapshot_name', default=None, show_default=True, - help='Name of initial snapshot') -@click.option('-t', '--minimizer_type', default="Minuit2", show_default=True, - help='Minimizer type') -@click.option('-a', '--minimizer_algo', default="Migrad", show_default=True, - help='Minimizer algorithm') -@click.option('-e', '--eps', type=float, default=1.0, show_default=True, - help='Convergence criterium') -@click.option('--retry', type=int, default=2, show_default=True, - help='Maximum number of retries upon a failed fit') -@click.option('--strategy', type=int, default=1, show_default=True, - help='Default minimization strategy') -@click.option('--print_level', type=int, default=-1, show_default=True, - help='Minimizer print level') -@click.option('--timer/--no_timer', default=False, show_default=True, - help='Enable minimizer timer') -@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, - help='Number of CPUs to use per parameter') -@click.option('--offset/--no-offset', default=True, show_default=True, - help='Offset likelihood') -@click.option('--optimize', type=int, default=2, show_default=True, - help='Optimize constant terms') -@click.option('--improve', type=int, default=0, show_default=True, - help='Execute improve after each minimization') -@click.option('--minimizer_offset', type=int, default=1, show_default=True, - help='Enable minimizer offsetting') -@click.option('--fix-cache/--no-fix-cache', default=True, show_default=True, - help='Fix StarMomentMorph cache') -@click.option('--fix-multi/--no-fix-cache', default=True, show_default=True, - help='Fix MultiPdf level 2') -@click.option('--max_calls', type=int, default=-1, show_default=True, - help='Maximum number of function calls') -@click.option('--max_iters', type=int, default=-1, show_default=True, - help='Maximum number of Minuit iterations') -@click.option('--batch_mode/--no-batch', default=False, show_default=True, - help='Batch mode when evaluating likelihood') -@click.option('--int_bin_precision', type=float, default=-1., show_default=True, - help='Integrate the PDF over the bins instead of using the probability ' - 'density at the bin centre') -@click.option('--constrain/--no-constrain', 'constrain_nuis', default=True, show_default=True, - help='Use constrained NLL') -@click.option('-v', '--verbosity', default='INFO', show_default=True, - type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), - help='Verbosity level.') -def cls_limit(**kwargs): - """ - Tool for evaluating Asymptotic CLs limit - """ - from quickstats.components import AsymptoticCLs - outname = kwargs.pop('outname') - save_summary = kwargs.pop('save_summary') - asymptotic_cls = AsymptoticCLs(**kwargs) - asymptotic_cls.evaluate_limits() - asymptotic_cls.save(outname, summary=save_summary) -@click.command(name='compile') +@cli.command(name='compile') @click.option('-m', '--macros', default=None, show_default=True, help='Macros to compile (separated by commas). By default all macros are compiled.') def compile_macros(macros): @@ -293,14 +44,14 @@ def compile_macros(macros): import quickstats quickstats.compile_macros(macros) -@click.command(name='add_macro') +@cli.command(name='add_macro') @click.option('-i', '--input_path', 'path', required=True, help='Path to the directory containing the source file for the macro.') -@click.option('-n', '--name', +@click.option('-n', '--name', default=None, help='Name of the macro. By default, the name of the input directory is used.') @click.option('-f', '--force', is_flag=True, help='Force overwrite existing files.') -@click.option('--copy-files/--do-not-copy-files', 'copy_files', default=True, +@click.option('--copy-files/--do-not-copy-files', 'copy_files', default=True, show_default=True, help='Whether to copy files from the input directory (required if not already copied).') @click.option('--add-to-workspace-extension/--do-not-add-to-workspace-extension', 'workspace_extension', default=True, show_default=True, @@ -312,7 +63,7 @@ def add_macro(**kwargs): import quickstats quickstats.add_macro(**kwargs) -@click.command(name='remove_macro') +@cli.command(name='remove_macro') @click.option('-n', '--name', required=True, help='Name of the macro.') @click.option('-f', '--force', is_flag=True, @@ -325,298 +76,4 @@ def remove_macro(**kwargs): Remove a ROOT macro from the module """ import quickstats - quickstats.remove_macro(**kwargs) - -@click.command(name='harmonize_np') -@click.argument('ws_files', nargs=-1) -@click.option('-r', '--reference', required=True, help='Path to reference json file containing renaming scheme') -@click.option('-i', '--input_config_path', default=None, show_default=True, - help='Path to json file containing input workspace paths') -@click.option('-b', '--base_path', default='./', show_default=True, - help='Base path for input config') -@click.option('-o', '--outfile', default='renamed_np.json', show_default=True, - help='Output filename') -def harmonize_np(ws_files, reference, input_config_path, base_path, outfile): - """ - Harmonize NP names across different workspaces - """ - from quickstats.components import NuisanceParameterHarmonizer - harmonizer = NuisanceParameterHarmonizer(reference) - if (len(ws_files) > 0) and input_config_path is not None: - raise RuntimeError('either workspace paths or json file containing workspace paths should be given') - if len(ws_files) > 0: - harmonizer.harmonize(ws_files, outfile=outfile) - elif (input_config_path is not None): - harmonizer.harmonize_multi_input(input_config_path, base_path, outfile=outfile) - - -@click.command(name='generate_asimov') -@click.option('-i', '--input_file', 'filename', required=True, - help='Path to the input workspace file.') -@click.option('-o', '--output_file', 'outname', required=True, - help='Name of the output workspace containing the ' - 'generated asimov dataset.') -@click.option('-p', '--poi', required=True, - help='Name of the parameter of interest (POI).') -@click.option('--poi_val', type=float, default=None, show_default=True, - help='Generate asimov data with POI set at the specified value. ' - 'If None, POI will be kept at the post-fit value if a fitting ' - 'is performed or the pre-fit value if no fitting is performed.') -@click.option('--poi_profile', type=float, default=None, show_default=True, - help='Perform nuisance parameter profiling with POI set at the specified value. ' - 'This option is only effective if do_fit is set to True. If None, POI is ' - 'set floating (i.e. unconditional maximum likelihood estimate).') -@click.option('-f', '--fix', 'fix_param', default="", show_default=True, - help='Parameters to fix') -@click.option('-r', '--profile', 'profile_param', default="", show_default=True, - help='Parameters to profile') -@click.option('-s', '--snapshot', 'snapshot_name', default=None, show_default=True, - help='Name of initial snapshot') -@click.option('--modify-globs/--keep-globs', default=True, show_default=True, - help='Match the values of nuisance parameters and the corresponding global ' - 'observables when generating the asimov data. This is important for making ' - 'sure the asimov data has the (conditional) minimal NLL.') -@click.option('--do-fit/--no-fit', default=True, show_default=True, - help='Perform nuisance parameter profiling with a fit to the given dataset.') -@click.option('--asimov_name', default="asimovData_{mu}", show_default=True, - help='Name of the generated asimov dataset.') -@click.option('--asimov_snapshot', default="asimovData_{mu}", show_default=True, - help='Name of the snapshot that generates the asimov dataset.') -@click.option('-d', '--data', default='combData', show_default=True, - help='Name of the dataset used in NP profiling.') -@click.option('--constraint_option', default=0, show_default=True, - help='\b\n Customize the target of nuisance paramaters involved in the profiling.' - '\b\n Case 0: All nuisance parameters are allowed to float;' - '\b\n Case 1: Constrained nuisance parameters are fixed to 0.' - '\b\n Unconstrained nuisrance parameters are allowed to float.') -@click.option('-c', '--configuration', default=None, - help='Path to the json configuration file containing' - ' the minimizer options for NP profiling.') -@click.option('--rebuild/--do-not-rebuild', default=False, show_default=True, - help='Rebuild the workspace.') -@click.option('-v', '--verbosity', default='INFO', show_default=True, - type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), - help='Verbosity level.') -def generate_asimov(**kwargs): - """ - Generate Asimov dataset - """ - from quickstats.components import AnalysisBase - filename = kwargs.pop('filename') - data_name = kwargs.pop('data') - poi_name = kwargs.pop('poi') - outname = kwargs.pop('outname') - verbosity = kwargs.pop('verbosity') - fix_param = kwargs.pop('fix_param') - profile_param = kwargs.pop('profile_param') - snapshot_name = kwargs.pop('snapshot_name') - config_file = kwargs.pop('configuration') - rebuild = kwargs.pop('rebuild') - if config_file is not None: - config = json.load(open(config_file, 'r')) - else: - config = {} - config['fix_param'] = fix_param - config['profile_param'] = profile_param - config['snapshot_name'] = snapshot_name - asimov_config = { - "poi_val" : kwargs.pop("poi_val"), - "poi_profile" : kwargs.pop("poi_profile"), - "do_fit" : kwargs.pop("do_fit"), - "modify_globs" : kwargs.pop("modify_globs"), - "do_import" : True, - "asimov_name" : kwargs.pop("asimov_name"), - "asimov_snapshot" : kwargs.pop("asimov_snapshot"), - "constraint_option" : kwargs.pop("constraint_option"), - "restore_states" : 0 - } - analysis = AnalysisBase(filename, poi_name=poi_name, data_name=data_name, - config=config, verbosity=verbosity) - analysis.generate_asimov(**asimov_config) - analysis.save(outname, rebuild=rebuild) - -@click.command(name='generate_standard_asimov') -@click.option('-i', '--input_file', 'filename', required=True, - help='Path to the input workspace file.') -@click.option('-o', '--output_file', 'outname', required=True, - help='Name of the output workspace containing the ' - 'generated asimov dataset.') -@click.option('-d', '--data', 'data_name', default='combData', show_default=True, - help='Name of the dataset used in NP profiling.') -@click.option('-p', '--poi', 'poi_name', required=True, - help='Name of the parameter of interest (POI). Multiple POIs are separated by commas.') -@click.option('-s', '--poi_scale', type=float, default=1.0, show_default=True, - help='Scale factor applied to the poi value') -@click.option('-f', '--fix', 'fix_param', default="", show_default=True, - help='Parameters to fix') -@click.option('-r', '--profile', 'profile_param', default="", show_default=True, - help='Parameters to profile') -@click.option('-s', '--snapshot', 'snapshot_name', default=None, show_default=True, - help='Name of initial snapshot') -@click.option('--rebuild/--do-not-rebuild', default=False, show_default=True, - help='Rebuild the workspace.') -@click.option('--asimov_names', default=None, show_default=True, - help='Names of the output asimov datasets (separated by commas). If not specified, ' - 'a default name for the corresponding asimov type will be given.') -@click.option('--asimov_snapshots', default=None, show_default=True, - help='Names of the output asimov snapshots (separated by commas). If not specified, ' - 'a default name for the corresponding asimov type will be given.') -@click.option('-t', '--asimov_types', default="0,1,2", show_default=True, - help='\b\n Types of asimov dataset to generate separated by commas.' - '\b\n 0: fit with POI fixed to 0' - '\b\n 1: fit with POI fixed to 1' - '\b\n 2: fit with POI free and set POI to 1 after fit' - '\b\n 3: fit with POI and constrained NP fixed to 0' - '\b\n 4: fit with POI fixed to 1 and constrained NP fixed to 0' - '\b\n 5: fit with POI free and constrained NP fixed to 0 and set POI to 1 after fit' - '\b\n -1: nominal NP with POI set to 0' - '\b\n -2: nominal NP with POI set to 1') -@click.option('-v', '--verbosity', default='INFO', show_default=True, - type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), - help='Verbosity level.') -def generate_standard_asimov(**kwargs): - """ - Generate standard Asimov dataset - """ - from quickstats.components import AsimovGenerator - from quickstats.utils.string_utils import split_str - outname = kwargs.pop('outname') - asimov_types = kwargs.pop('asimov_types') - try: - asimov_types = split_str(asimov_types, sep=",", cast=int) - except: - asimov_types = split_str(asimov_types, sep=",") - fix_param = kwargs.pop('fix_param') - profile_param = kwargs.pop('profile_param') - snapshot_name = kwargs.pop('snapshot_name') - poi_scale = kwargs.pop("poi_scale") - asimov_names = kwargs.pop("asimov_names") - asimov_snapshots = kwargs.pop("asimov_snapshots") - verbosity = kwargs.pop("verbosity") - rebuild = kwargs.pop("rebuild") - kwargs['poi_name'] = split_str(kwargs.pop('poi_name'), sep=",") - config = { - 'fix_param': fix_param, - 'profile_param': profile_param, - 'snapshot_name': snapshot_name - } - from quickstats.utils.string_utils import split_str - if asimov_names is not None: - asimov_names = split_str(asimov_names, sep=",") - if asimov_snapshots is not None: - asimov_snapshots = split_str(asimov_snapshots, sep=",") - generator = AsimovGenerator(**kwargs, config=config, verbosity=verbosity) - generator.generate_standard_asimov(asimov_types, poi_scale=poi_scale, - asimov_names=asimov_names, - asimov_snapshots=asimov_snapshots) - generator.save(outname, rebuild=rebuild) - -@click.command(name='toy_significance') -@click.option('-i', '--input_file', 'filename', required=True, - help='Path to the input workspace file.') -@click.option('-o', '--output_file', 'outname', default="toy_study/results.json", - help='Name of the output file containing toy results.') -@click.option('-n', '--n_toys', type=int, - help='Number of the toys to use.') -@click.option('-b', '--batchsize', type=int, default=100, show_default=True, - help='Divide the task into batches each containing this number of toys. ' - 'Result from each batch is saved for caching and different batches ' - 'are run in parallel if needed') -@click.option('-s', '--seed', type=int, default=0, show_default=True, - help='Random seed used for generating toy datasets.') -@click.option('-p', '--poi', 'poi_name', default=None, - help='Name of the parameter of interest (POI). If None, the first POI is used.') -@click.option('-v', '--poi_val', type=float, default=0, show_default=True, - help='POI value when generating the toy dataset.') -@click.option('--binned/--unbinned', default=True, show_default=True, - help='Generate binned toy dataset.') -@click.option('--cache/--no-cache', default=True, show_default=True, - help='Cache existing batch results.') -@click.option('--fit_options', default=None, help='A json file specifying the fit options.') -@click.option('-v', '--verbosity', default='INFO', show_default=True, - type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), - help='Verbosity level.') -@click.option('--parallel', type=int, default=-1, show_default=True, - help='\b\n Parallelize job across the N workers.' - '\b\n Case 0: Jobs are run sequentially (for debugging).' - '\b\n Case -1: Jobs are run across N_CPU workers.') -def toy_significance(**kwargs): - """ - Generate toys and evaluate significance - """ - from quickstats.components import PValueToys - n_toys = kwargs.pop("n_toys") - batchsize = kwargs.pop("batchsize") - seed = kwargs.pop("seed") - cache = kwargs.pop("cache") - outname = kwargs.pop("outname") - parallel = kwargs.pop("parallel") - pvalue_toys = PValueToys(**kwargs) - pvalue_toys.get_toy_results(n_toys=n_toys, batchsize=batchsize, seed=seed, - cache=cache, save_as=outname, parallel=parallel) - - - -@click.command(name='toy_limit') -@click.option('-i', '--input_file', 'filename', required=True, - help='Path to the input workspace file.') -@click.option('-d', '--data', 'data_name', default='combData', show_default=True, - help='Name of the dataset used for computing observed limit.') -@click.option('-o', '--output_file', 'outname', - default="toy_study/toy_result_seed_{seed}_batch_{batch}.root", - show_default=True, - help='Name of the output file containing toy results.') -@click.option('--poi_max', type=float, default=None, - help='Maximum range of POI.') -@click.option('--poi_min', type=float, default=None, - help='Minimum range of POI.') -@click.option('--scan_max', type=float, default=None, - help='Maximum scan value of POI.') -@click.option('--scan_min', type=float, default=None, - help='Minimum scan value of POI.') -@click.option('--steps', type=int, default=10, show_default=True, - help='Number of scan steps.') -@click.option('--mu_val', type=float, default=None, - help='Value of POI for running a single point') -@click.option('-n', '--n_toys', type=int, - help='Number of the toys to use.') -@click.option('-b', '--batchsize', type=int, default=50, show_default=True, - help='Divide the task into batches each containing this number of toys. ' - 'Result from each batch is saved for caching and different batches ' - 'are run in parallel if needed') -@click.option('-s', '--seed', type=int, default=2021, show_default=True, - help='Random seed used for generating toy datasets.') -@click.option('-t', '--tolerance', type=float, default=1., show_default=True, - help='Tolerance for minimization.') -@click.option('-p', '--poi', 'poi_name', default=None, - help='Name of the parameter of interest (POI). If None, the first POI is used.') -@click.option('--minimizer_type', default="Minuit2", show_default=True, - help='Minimizer type') -@click.option('--strategy', type=int, default=1, show_default=True, - help='Default minimization strategy') -@click.option('--offset/--no-offset', default=True, show_default=True, - help='Use NLL offset.') -@click.option('--print_level', type=int, default=-1, show_default=True, - help='Minimizer print level') -@click.option('-v', '--verbosity', default='INFO', show_default=True, - type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), - help='Verbosity level.') -@click.option('-f', '--fix', 'fix_param', default="", show_default=True, - help='Parameters to fix') -@click.option('-r', '--profile', 'profile_param', default="", show_default=True, - help='Parameters to profile') -@click.option('--snapshot', 'snapshot_name', default=None, help='Name of initial snapshot') -@click.option('--parallel', type=int, default=-1, show_default=True, - help='\b\n Parallelize job across the N workers.' - '\b\n Case 0: Jobs are run sequentially (for debugging).' - '\b\n Case -1: Jobs are run across N_CPU workers.') -def toy_limit(**kwargs): - """ - Generate toys and evaluate limits - """ - from quickstats.components.toy_limit_calculator import evaluate_batched_toy_limits - if not (((kwargs['scan_min'] is None) and (kwargs['scan_max'] is None) and (kwargs['mu_val'] is not None)) or \ - ((kwargs['scan_min'] is not None) and (kwargs['scan_max'] is not None) and (kwargs['mu_val'] is None))): - raise ValueError("please provide either (scan_min, scan_max, steps) for running a scan or (mu_val)" - " for running a single point") - evaluate_batched_toy_limits(**kwargs) \ No newline at end of file + quickstats.remove_macro(**kwargs) \ No newline at end of file diff --git a/quickstats/clis/inspect_rfile.py b/quickstats/clis/inspect_rfile.py index d2a5a93a259ad312cf5b43f90034b3244e12b0dc..81a7813f50ea854947dbefe4f77d5ea9888d76a2 100644 --- a/quickstats/clis/inspect_rfile.py +++ b/quickstats/clis/inspect_rfile.py @@ -1,8 +1,11 @@ import click - import fnmatch + +from .core import cli + +__all__ = ['inspect_rfile'] -@click.command(name='inspect_rfile') +@cli.command(name='inspect_rfile') @click.option('-i', '--file_expr', required=True, help='File name expression ' '(supports wild card, multiple files separated by commas).') @click.option('-t', '--tree_name', required=True, help='Tree name.') diff --git a/quickstats/clis/inspect_ws.py b/quickstats/clis/inspect_ws.py deleted file mode 100644 index e977195f49bb07fc5bc4847a80972f75b11e3bde..0000000000000000000000000000000000000000 --- a/quickstats/clis/inspect_ws.py +++ /dev/null @@ -1,41 +0,0 @@ -import click - -import fnmatch - -from .core import DelimitedStr - -kItemChoices = ['workspace', 'dataset', 'snapshot', 'category', 'poi', - 'detailed_nuisance_parameter', 'nuisance_parameter', - 'global_observable', 'auxiliary'] -kDefaultItems = ",".join(['workspace', 'dataset', 'snapshot', 'category', - 'poi', 'detailed_nuisance_parameter']) - -@click.command(name='inspect_ws') -@click.option('-i', '--input_file', required=True, help='Path to the input workspace file') -@click.option('-w', '--workspace', 'ws_name', default=None, help='Name of workspace. Auto-detect by default.') -@click.option('-d', '--dataset', 'data_name', default=None, help='Name of dataset. Generally not needed.') -@click.option('-m', '--model_config', 'mc_name', default=None, help='Name of model config. Auto-detect by default.') -@click.option('-o', '--output_file', default=None, help='Export output to text file. If None, no output is saved.') -@click.option('--items', cls=DelimitedStr, type=click.Choice(kItemChoices), show_default=True, - default=kDefaultItems, help='Items to include in the summary (separated by commas).') -@click.option('--include', 'include_patterns', default=None, - help='Match variable names with given patterns (separated by commas).') -@click.option('--exclude', 'exclude_patterns', default=None, - help='Exclude variable names with given patterns (separated by commas).') -@click.option('--detailed/--name-only', default=True, show_default=True, - help='Include detailed variable properties or just the variable name in the summary.') -def inspect_ws(input_file, ws_name=None, data_name=None, mc_name=None, output_file=None, items=None, - include_patterns=None, exclude_patterns=None, detailed=True): - ''' - Inspect workspace attributes - ''' - from quickstats.components import ExtendedModel - model = ExtendedModel(input_file, ws_name=ws_name, mc_name=mc_name, data_name=data_name, - verbosity="WARNING") - from quickstats.utils.string_utils import split_str - #items = items.split(",") if items is not None else None - include_patterns = split_str(include_patterns, ',') if include_patterns is not None else None - exclude_patterns = split_str(exclude_patterns, ',') if exclude_patterns is not None else None - model.stdout.verbosity = "INFO" - model.print_summary(items=items, save_as=output_file, detailed=detailed, - include_patterns=include_patterns, exclude_patterns=exclude_patterns) \ No newline at end of file diff --git a/quickstats/clis/likelihood_fit.py b/quickstats/clis/likelihood_fit.py deleted file mode 100644 index ed3043d8d18b755f34e23632e84dc69c24536cbb..0000000000000000000000000000000000000000 --- a/quickstats/clis/likelihood_fit.py +++ /dev/null @@ -1,296 +0,0 @@ -import os -import json -import click - -@click.command(name='likelihood_fit') -@click.option('-i', '--input_file', "filename", required=True, - help='Path to the input workspace file.') -@click.option('-o', '--outname', default='fit_result.json', show_default=True, - help='Name of output file.') -@click.option('--display/--no-display', default=True, show_default=True, - help='Display fit result.') -@click.option('--save/--no-save', "save_result", default=False, show_default=True, - help='Save fit result.') -@click.option('--save_log/--skip_log', default=False, show_default=True, - help='Save log file.') -@click.option('--save_ws', default=None, show_default=True, - help='Save fitted workspace to a given path.') -@click.option('--save_snapshot', default=None, show_default=True, - help='Save fitted values of all variables as a snapshot and restore all variables to ' - 'their initial values. Should be used together with --save_ws.') -@click.option('--rebuild/--no-rebuild', default=True, show_default=True, - help='Save fitted workspace by rebuilding it. Should be used together with --save_ws.') -@click.option('--outdir', default="pulls", show_default=True, - help='Output directory for pulls output.') -@click.option('--export_as_np_pulls/--skip_export_as_np_pulls', default=False, show_default=True, - help='Export (constrained) NP results for pulls plot.') -@click.option('-w', '--workspace', 'ws_name', default=None, show_default=True, - help='Name of workspace. Auto-detect by default.') -@click.option('-m', '--model_config', 'mc_name', default=None, show_default=True, - help='Name of model config. Auto-detect by default.') -@click.option('-d', '--data', 'data_name', default='combData', show_default=True, - help='Name of dataset.') -@click.option('-s', '--snapshot', 'snapshot_name', default=None, show_default=True, - help='Name of initial snapshot') -@click.option('-r', '--profile', 'profile_param', default="", show_default=True, - help='Parameters to profile') -@click.option('-f', '--fix', 'fix_param', default="", show_default=True, - help='Parameters to fix') -@click.option('--pois', default="", show_default=True, - help='Define the set of POIs (separated by commas) set for calculating Minos errors.') -@click.option('--constrain/--no-constrain', 'constrain_nuis', default=True, show_default=True, - help='Use constrained NLL (i.e. include systematics)') -@click.option('-t', '--minimizer_type', default="Minuit2", show_default=True, - help='Minimizer type') -@click.option('-a', '--minimizer_algo', default="Migrad", show_default=True, - help='Minimizer algorithm') -@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, - help='Number of CPUs to use per parameter') -@click.option('-e', '--eps', type=float, default=1.0, show_default=True, - help='Convergence criterium') -@click.option('--retry', type=int, default=0, show_default=True, - help='Maximum number of retries upon a failed fit') -@click.option('--strategy', type=int, default=1, show_default=True, - help='Default minimization strategy') -@click.option('--hesse/--no-hesse', default=False, show_default=True, - help='Evaluate errors using Hesse.') -@click.option('--improve', type=int, default=0, show_default=True, - help='Execute improve after each minimization') -@click.option('--minimizer_offset', type=int, default=1, show_default=True, - help='Enable minimizer offsetting') -@click.option('--minos/--no-minos', default=False, show_default=True, - help='Evaluate errors using Minos.') -@click.option('--print_level', type=int, default=-1, show_default=True, - help='Minimizer print level') -@click.option('--fix-cache/--no-fix-cache', default=True, show_default=True, - help='Fix StarMomentMorph cache') -@click.option('--fix-multi/--no-fix-cache', default=True, show_default=True, - help='Fix MultiPdf level 2') -@click.option('--max_calls', type=int, default=-1, show_default=True, - help='Maximum number of function calls') -@click.option('--max_iters', type=int, default=-1, show_default=True, - help='Maximum number of Minuit iterations') -@click.option('--optimize', type=int, default=2, show_default=True, - help='Optimize constant terms') -@click.option('--offset/--no-offset', default=True, show_default=True, - help='Offset likelihood') -@click.option('--batch_mode/--no-batch', default=False, show_default=True, - help='Batch mode when evaluating likelihood') -@click.option('--int_bin_precision', type=float, default=-1., show_default=True, - help='Integrate the PDF over the bins instead of using the probability ' - 'density at the bin centre') -@click.option('-v', '--verbosity', default='INFO', show_default=True, - type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), - help='Verbosity level.') -def likelihood_fit(**kwargs): - """ - Perform likelihood fit on a workspace - """ - do_minos = kwargs.pop("minos") - rebuild = kwargs.pop("rebuild") - from quickstats.utils.string_utils import split_str - pois = split_str(kwargs.pop("pois"), sep=',', remove_empty=True) - _kwargs = {} - for arg_name in ["outname", "save_log", "display", "save_result", - "export_as_np_pulls", "outdir", "save_ws", "save_snapshot"]: - _kwargs[arg_name] = kwargs.pop(arg_name) - _init_kwargs = {} - for arg_name in ["filename", "data_name", "verbosity"]: - _init_kwargs[arg_name] = kwargs.pop(arg_name) - _init_kwargs['config'] = kwargs - _init_kwargs['poi_name'] = pois - from quickstats.components import AnalysisBase - if _kwargs['save_log']: - from quickstats.concurrent.logging import standard_log - log_path = os.path.splitext(_kwargs["outname"])[0] + ".log" - with standard_log(log_path) as logger: - analysis = AnalysisBase(**_init_kwargs) - if _kwargs['export_as_np_pulls']: - analysis.minimizer.configure(hesse=True) - fit_result = analysis.nll_fit(mode=3, do_minos=do_minos) - print(f"INFO: Saved fit log to `{log_path}`") - else: - analysis = AnalysisBase(**_init_kwargs) - fit_result = analysis.nll_fit(mode=3, do_minos=do_minos) - output = {} - output['fit_result'] = fit_result - df = {'pois':{}, 'nuisance_parameters':{}} - analysis.load_snapshot("currentSnapshot") - df['pois']['prefit'] = analysis.model.as_dataframe('poi') - df['nuisance_parameters']['prefit'] = analysis.model.as_dataframe('nuisance_parameter') - analysis.load_snapshot("nllFit") - if do_minos: - df['pois']['postfit'] = analysis.model.as_dataframe('poi', asym_error=True) - else: - df['pois']['postfit'] = analysis.model.as_dataframe('poi') - df['nuisance_parameters']['postfit'] = analysis.model.as_dataframe('nuisance_parameter') - if _kwargs['display']: - import pandas as pd - pd.set_option('display.max_rows', None) - for key in ['pois', 'nuisance_parameters']: - df[key]['combined'] = df[key]['prefit'].drop(["value", "error"], axis=1) - df[key]['combined']['value_prefit'] = df[key]['prefit']['value'] - df[key]['combined']['value_postfit'] = df[key]['postfit']['value'] - df[key]['combined']['error_prefit'] = df[key]['prefit']['error'] - if (key == "pois") and do_minos: - df[key]['combined']['errorlo_postfit'] = df[key]['postfit']['errorlo'] - df[key]['combined']['errorhi_postfit'] = df[key]['postfit']['errorhi'] - else: - df[key]['combined']['error_postfit'] = df[key]['postfit']['error'] - output[key] = df[key]['combined'].to_dict("list") - if _kwargs['display']: - print("{}:".format(key.title())) - print(df[key]['combined']) - print() - if _kwargs['save_result']: - import json - with open(_kwargs["outname"], "w") as f: - json.dump(output, f, indent=2) - print(f"INFO: Saved fit result to `{_kwargs['outname']}`") - if _kwargs['export_as_np_pulls']: - outdir = _kwargs['outdir'] - if not os.path.exists(outdir): - os.makedirs(outdir) - nuis_df = df[key]['combined'].drop(['min', 'max', 'is_constant', 'error_prefit'], axis=1) - nuis_df = nuis_df.rename(columns={"value_prefit":"nuis_nom", "name":"nuisance", - "value_postfit":"nuis_hat", "error_postfit":"nuis_hi"}) - nuis_df["nuis_lo"] = nuis_df["nuis_hi"] - nuis_df["nuis_prefit"] = 1.0 - nuis_df = nuis_df.set_index(['nuisance']) - constrained_np = [i.GetName() for i in analysis.model.get_constrained_nuisance_parameters()] - nuis_df = nuis_df.loc[constrained_np].reset_index() - nuis_data = nuis_df.to_dict('index') - import json - for i in nuis_data: - data = nuis_data[i] - np_name = data['nuisance'] - outpath = os.path.join(outdir, f"{np_name}.json") - with open(outpath, "w") as outfile: - json.dump({"nuis": data}, outfile, indent=2) - if _kwargs["save_ws"] is not None: - filename = _kwargs["save_ws"] - if _kwargs["save_snapshot"] is not None: - snapshot_name = _kwargs["save_snapshot"] - from quickstats.components.basics import WSArgument - analysis.save_snapshot(snapshot_name, WSArgument.MUTABLE) - analysis.load_snapshot(analysis.kInitialSnapshotName) - analysis.save(filename, rebuild=rebuild) - -@click.command(name='np_correlation') -@click.option('-i', '--input_file', "filename", required=True, - help='Path to the input workspace file.') -@click.option('-o', '--basename', default='NP_correlation_matrix', show_default=True, - help='Base name of the output.') -@click.option('--select', default=None, show_default=True, - help='Select specific NPs to be stored in the final output (for json and plot only). ' - 'Use comma to separate the selection (wild card is supported).') -@click.option('--remove', default=None, show_default=True, - help='Select specific NPs to be removed in the final output (for json and plot only). ' - 'Use comma to separate the selection (wild card is supported).') -@click.option('--save_plot/--no_save_plot', default=True, show_default=True, - help='Save NP correlation matrix as a plot in pdf format') -@click.option('--save_json/--no_save_json', default=False, show_default=True, - help='Save NP correlation matrix as a json file') -@click.option('--save_root/--no_save_root', default=False, show_default=True, - help='Save NP correlation matrix as a 2D histogram in a root file') -@click.option('--plot_style', default="default", show_default=True, - help='Plot style if save_plot is enabled. Choose between \"default\" and ' - f'\"viridis\". Alternatively, a path to a yaml config file can be used') -@click.option('-w', '--workspace', 'ws_name', default=None, show_default=True, - help='Name of workspace. Auto-detect by default.') -@click.option('-m', '--model_config', 'mc_name', default=None, show_default=True, - help='Name of model config. Auto-detect by default.') -@click.option('-d', '--data', 'data_name', default='combData', show_default=True, - help='Name of dataset.') -@click.option('-s', '--snapshot', 'snapshot_name', default=None, show_default=True, - help='Name of initial snapshot') -@click.option('-r', '--profile', 'profile_param', default="", show_default=True, - help='Parameters to profile') -@click.option('-f', '--fix', 'fix_param', default="", show_default=True, - help='Parameters to fix') -@click.option('--constrain/--no-constrain', 'constrain_nuis', default=True, show_default=True, - help='Use constrained NLL (i.e. include systematics)') -@click.option('-t', '--minimizer_type', default="Minuit2", show_default=True, - help='Minimizer type') -@click.option('-a', '--minimizer_algo', default="Migrad", show_default=True, - help='Minimizer algorithm') -@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, - help='Number of CPUs to use per parameter') -@click.option('-e', '--eps', type=float, default=1.0, show_default=True, - help='Convergence criterium') -@click.option('--strategy', type=int, default=1, show_default=True, - help='Default minimization strategy') -@click.option('--print_level', type=int, default=-1, show_default=True, - help='Minimizer print level') -@click.option('--fix-cache/--no-fix-cache', default=True, show_default=True, - help='Fix StarMomentMorph cache') -@click.option('--fix-multi/--no-fix-cache', default=True, show_default=True, - help='Fix MultiPdf level 2') -@click.option('--max_calls', type=int, default=-1, show_default=True, - help='Maximum number of function calls') -@click.option('--max_iters', type=int, default=-1, show_default=True, - help='Maximum number of Minuit iterations') -@click.option('--optimize', type=int, default=2, show_default=True, - help='Optimize constant terms') -@click.option('--improve', type=int, default=0, show_default=True, - help='Execute improve after each minimization') -@click.option('--minimizer_offset', type=int, default=1, show_default=True, - help='Enable minimizer offsetting') -@click.option('--offset/--no-offset', default=True, show_default=True, - help='Offset likelihood') -@click.option('--batch_mode/--no-batch', default=False, show_default=True, - help='Batch mode when evaluating likelihood') -@click.option('--int_bin_precision', type=float, default=-1., show_default=True, - help='Integrate the PDF over the bins instead of using the probability ' - 'density at the bin centre') -@click.option('-v', '--verbosity', default='INFO', show_default=True, - type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), - help='Verbosity level.') -def np_correlation(**kwargs): - """ - Evaluate post-fit NP correlation matrix - """ - _kwargs = {} - for arg_name in ["basename", "save_plot", "save_json", "save_root", - "plot_style", "select", "remove"]: - _kwargs[arg_name] = kwargs.pop(arg_name) - _init_kwargs = {} - for arg_name in ["filename", "data_name", "verbosity"]: - _init_kwargs[arg_name] = kwargs.pop(arg_name) - _init_kwargs['config'] = kwargs - _init_kwargs['poi_name'] = [] - from quickstats.components import AnalysisBase - analysis = AnalysisBase(**_init_kwargs) - analysis.minimizer.configure(hesse=True) - analysis.nll_fit(mode=3) - fit_result = analysis.roofit_result - basename = os.path.splitext(_kwargs['basename'])[0] - from quickstats.utils.roofit_utils import get_correlation_matrix - if _kwargs['save_root']: - correlation_hist = get_correlation_matrix(fit_result, lib="root") - outname = basename + ".root" - correlation_hist.SaveAs(outname) - print(f"INFO: Saved correlation histogram to `{outname}`") - correlation_hist.Delete() - from quickstats.utils.common_utils import filter_by_wildcards - if _kwargs['save_json'] or _kwargs['save_plot']: - df = get_correlation_matrix(fit_result, lib='pandas') - labels = list(df.columns) - selected = filter_by_wildcards(labels, _kwargs['select']) - selected = filter_by_wildcards(selected, _kwargs['remove'], exclusion=True) - to_drop = list(set(labels) - set(selected)) - df = df.drop(to_drop, axis=0).drop(to_drop, axis=1).transpose() - if _kwargs['save_json']: - data = df.to_dict() - outname = basename + ".json" - with open(outname, "w") as out: - json.dump(data, out, indent=2) - print(f"INFO: Saved correlation data to `{outname}`") - if _kwargs['save_plot']: - import matplotlib.pyplot as plt - from quickstats.plots import CorrelationPlot - plotter = CorrelationPlot(df) - ax = plotter.draw_style(_kwargs['plot_style']) - outname = basename + ".pdf" - plt.savefig(outname, bbox_inches="tight") - print(f"INFO: Saved correlation plot to `{outname}`") \ No newline at end of file diff --git a/quickstats/clis/likelihood_scan.py b/quickstats/clis/likelihood_scan.py deleted file mode 100644 index cf9909c420c76b3cca1f97cfd07844851d68b266..0000000000000000000000000000000000000000 --- a/quickstats/clis/likelihood_scan.py +++ /dev/null @@ -1,130 +0,0 @@ -import os -import click - -@click.command(name='likelihood_scan') -@click.option('-i', '--input_file', required=True, - help='Path to the input workspace file.') -@click.option('-p', '--param_expr', default=None, - help='\b\n Parameter expression, e.g.' - '\b\n 1D scan: "poi_name=<poi_min>_<poi_max>_<step>"' - '\b\n 2D scan: "poi_1_name=<poi_1_min>_<poi_1_max>_<step_1>,' - '\b\n poi_2_name=<poi_2_min>_<poi_2_max>_<step_2>"') -@click.option('--filter', 'filter_expr', default=None, show_default=True, - help='\b Filter parameter points by expression.\n' - '\b Example: "mass=2*,350,400,450;klambda=1.*,2.*,-1.*,-2.*"\n' - '\b Refer to documentation for more information\n') -@click.option('--exclude', 'exclude_expr', default=None, show_default=True, - help='\b Exclude parameter points by expression.\n' - '\b Example: "mass=2*,350,400,450;klambda=1.*,2.*,-1.*,-2.*"\n' - '\b Refer to documentation for more information\n') -@click.option('--min', 'poi_min', type=float, default=None, - help='(deprecated) Minimum POI value to scan.') -@click.option('--max', 'poi_max', type=float, default=None, - help='(deprecated) Maximum POI value to scan.') -@click.option('--step', 'poi_step', type=float, default=None, - help='(deprecated) Scan interval.') -@click.option('--poi', 'poi_name', default=None, show_default=True, - help='(deprecated) POI to scan. If not specified, the first POI from the workspace is used.') -@click.option('--cache/--no-cache', default=True, show_default=True, - help='Cache existing result.') -@click.option('-o', '--outname', default='{poi_names}.json', show_default=True, - help='Name of output') -@click.option('--outdir', default='likelihood_scan', show_default=True, - help='Output directory.') -@click.option('--cachedir', default='cache', show_default=True, - help='Cache directory relative to the output directory.') -@click.option('--save_log/--skip_log', default=True, show_default=True, - help='Save log file.') -@click.option('-w', '--workspace', 'ws_name', default=None, show_default=True, - help='Name of workspace. Auto-detect by default.') -@click.option('-m', '--model_config', 'mc_name', default=None, show_default=True, - help='Name of model config. Auto-detect by default.') -@click.option('-d', '--data', 'data_name', default='combData', show_default=True, - help='Name of dataset.') -@click.option('-s', '--snapshot', 'snapshot_name', default=None, show_default=True, - help='Name of initial snapshot.') -@click.option('--uncond_snapshot', default=None, show_default=True, - help='Name of snapshot with unconditional fit result.') -@click.option('-r', '--profile', 'profile_param', default="", show_default=True, - help='Parameters to profile.') -@click.option('-f', '--fix', 'fix_param', default="", show_default=True, - help='Parameters to fix.') -@click.option('--constrain/--no-constrain', 'constrain_nuis', default=True, show_default=True, - help='Use constrained NLL (i.e. include systematics).') -@click.option('-t', '--minimizer_type', default="Minuit2", show_default=True, - help='Minimizer type.') -@click.option('-a', '--minimizer_algo', default="Migrad", show_default=True, - help='Minimizer algorithm.') -@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, - help='Number of CPUs to use per parameter.') -@click.option('--binned/--unbinned', 'binned_likelihood', default=True, show_default=True, - help='Binned likelihood.') -@click.option('-e', '--eps', type=float, default=1.0, show_default=True, - help='Convergence criterium.') -@click.option('--retry', type=int, default=0, show_default=True, - help='Maximum number of retries upon a failed fit.') -@click.option('--strategy', type=int, default=1, show_default=True, - help='Default minimization strategy.') -@click.option('--print_level', type=int, default=-1, show_default=True, - help='Minimizer print level.') -@click.option('--fix-cache/--no-fix-cache', default=True, show_default=True, - help='Fix StarMomentMorph cache.') -@click.option('--fix-multi/--no-fix-cache', default=True, show_default=True, - help='Fix MultiPdf level 2.') -@click.option('--max_calls', type=int, default=-1, show_default=True, - help='Maximum number of function calls.') -@click.option('--max_iters', type=int, default=-1, show_default=True, - help='Maximum number of Minuit iterations.') -@click.option('--optimize', type=int, default=2, show_default=True, - help='Optimize constant terms.') -@click.option('--improve', type=int, default=0, show_default=True, - help='Execute improve after each minimization') -@click.option('--minimizer_offset', type=int, default=1, show_default=True, - help='Enable minimizer offsetting') -@click.option('--offset/--no-offset', default=True, show_default=True, - help='Offset likelihood.') -@click.option('--allow-nan/--not-allow-nan', default=True, show_default=True, - help='Allow cached nll to be nan.') -@click.option('-v', '--verbosity', default='INFO', show_default=True, - type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), - help='Verbosity level.') -@click.option('--parallel', type=int, default=-1, show_default=True, - help='\b\n Parallelize job across the N workers.' - '\b\n Case 0: Jobs are run sequentially (for debugging).' - '\b\n Case -1: Jobs are run across N_CPU workers.') -def likelihood_scan(**kwargs): - """ - Evaluate a set of parmeterised likelihood values - """ - _kwargs = {} - for arg_name in ["input_file", "param_expr", "data_name", "outdir", "filter_expr", "uncond_snapshot", - "exclude_expr", "outname", "cache", "cachedir", "save_log", "parallel", "verbosity", - "allow_nan"]: - _kwargs[arg_name] = kwargs.pop(arg_name) - _kwargs['config'] = kwargs - - # for backward compatibility - _deprecated_kwargs = {} - for arg_name in ["poi_min", "poi_max", "poi_step", "poi_name"]: - _deprecated_kwargs[arg_name] = kwargs.pop(arg_name) - if all(_deprecated_kwargs[arg_name] is not None for arg_name in ["poi_min", "poi_max", "poi_step"]): - if _kwargs['param_expr'] is not None: - raise RuntimeError("either `param_expr` or (`poi_min`, `poi_max`, `poi_step`) should be " - "given for 1D likelihood scan") - print("WARNING: Likelihood scan using `poi_min`, `poi_max` and `poi_step` is " - "deprecated. Use --param_expr \"<poi_name>=<poi_min>_<poi_max>_<poi_step>\" instead.") - from quickstats.components import AnalysisBase - analysis = AnalysisBase(_kwargs['input_file'], poi_name=_deprecated_kwargs['poi_name'], - data_name=_kwargs['data_name'], verbosity="WARNING") - poi_name = analysis.poi.GetName() - poi_min = _deprecated_kwargs['poi_min'] - poi_max = _deprecated_kwargs['poi_max'] - poi_step = _deprecated_kwargs['poi_step'] - _kwargs['param_expr'] = f"{poi_name}={poi_min}_{poi_max}_{poi_step}" - elif (not all(_deprecated_kwargs[arg_name] is None for arg_name in ["poi_min", "poi_max", "poi_step"])) or \ - (_kwargs['param_expr'] is None): - raise RuntimeError("either `param_expr` or (`poi_min`, `poi_max`, `poi_step`) should be " - "given for 1D likelihood scan") - from quickstats.concurrent import ParameterisedLikelihood - runner = ParameterisedLikelihood(**_kwargs) - runner.run() diff --git a/quickstats/clis/likelihood_tools.py b/quickstats/clis/likelihood_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..39eed9ce5c9079bfb9b3471949cf8bc17abf568b --- /dev/null +++ b/quickstats/clis/likelihood_tools.py @@ -0,0 +1,394 @@ +import os +import json +import click + +from .core import cli + +__all__ = ['likelihood_fit', 'likelihood_scan'] + +@cli.command(name='likelihood_fit') +@click.option('-i', '--input_file', "filename", required=True, + help='Path to the input workspace file.') +@click.option('-o', '--outname', default='fit_result.json', show_default=True, + help='Name of output file.') +@click.option('--display/--no-display', default=True, show_default=True, + help='Display fit result.') +@click.option('--save/--no-save', "save_result", default=False, show_default=True, + help='Save fit result.') +@click.option('--save_log/--skip_log', default=False, show_default=True, + help='Save log file.') +@click.option('--save_ws', default=None, show_default=True, + help='Save fitted workspace to a given path.') +@click.option('--save_snapshot', default=None, show_default=True, + help='Save fitted values of all variables as a snapshot and restore all variables to ' + 'their initial values. Should be used together with --save_ws.') +@click.option('--rebuild/--no-rebuild', default=True, show_default=True, + help='Save fitted workspace by rebuilding it. Should be used together with --save_ws.') +@click.option('--export_as_np_pulls/--skip_export_as_np_pulls', default=False, show_default=True, + help='Export (constrained) NP results for pulls plot.') +@click.option('--outdir', default="pulls", show_default=True, + help='Output directory for pulls output.') +@click.option('-w', '--workspace', 'ws_name', default=None, show_default=True, + help='Name of workspace. Auto-detect by default.') +@click.option('-m', '--model_config', 'mc_name', default=None, show_default=True, + help='Name of model config. Auto-detect by default.') +@click.option('-d', '--data', 'data_name', default='combData', show_default=True, + help='Name of dataset.') +@click.option('-s', '--snapshot', 'snapshot_name', default=None, show_default=True, + help='Name of initial snapshot.') +@click.option('-r', '--profile', 'profile_param', default="", show_default=True, + help='Parameters to profile.') +@click.option('-f', '--fix', 'fix_param', default="", show_default=True, + help='Parameters to fix.') +@click.option('--pois', default="", show_default=True, + help='Define the set of POIs (separated by commas) set for calculating Minos errors.') +@click.option('--constrain/--no-constrain', 'constrain_nuis', default=True, show_default=True, + help='Use constrained NLL (i.e. include systematics).') +@click.option('--minos/--no-minos', default=False, show_default=True, + help='Evaluate errors using Minos.') +@click.option('-t', '--minimizer_type', default="Minuit2", show_default=True, + help='Minimizer type.') +@click.option('-a', '--minimizer_algo', default="Migrad", show_default=True, + help='Minimizer algorithm.') +@click.option('--strategy', type=int, default=1, show_default=True, + help='Default minimization strategy.') +@click.option('-e', '--eps', type=float, default=1.0, show_default=True, + help='Minimization convergence criterium.') +@click.option('--retry', type=int, default=1, show_default=True, + help='Maximum number of retries upon a failed fit.') +@click.option('--optimize', type=int, default=2, show_default=True, + help='Optimize constant terms.') +@click.option('--minimizer_offset', type=int, default=1, show_default=True, + help='Enable minimizer offsetting.') +@click.option('--offset/--no-offset', default=True, show_default=True, + help='Offset likelihood.') +@click.option('--binned/--unbinned', 'binned_likelihood', default=True, show_default=True, + help='Activate binned likelihood for RooRealSumPdf.') +@click.option('--print_level', type=int, default=-1, show_default=True, + help='Minimizer print level.') +@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, + help='Number of CPUs to use during minimization.') +@click.option('--batch_mode/--no-batch', default=False, show_default=True, + help='Batch mode when evaluating likelihood.') +@click.option('--int_bin_precision', type=float, default=-1., show_default=True, + help='Integrate the PDF over the bins instead of using the probability ' + 'density at the bin center.') +@click.option('--extra_minimizer_options', default=None, show_default=True, + help='Additional minimizer options to include. Format should be <config>=<value> ' + 'separated by commas. Example: "discrete_min_tol=0.001,do_discrete_iteration=1"') +@click.option('--cms_runtimedef', 'runtimedef_expr', default=None, show_default=True, + help='CMS specific runtime definitions. Format should be <config>=<value> ' + 'separated by commas. Example: "REMOVE_CONSTANT_ZERO_POINT=1,ADDNLL_GAUSSNLL=0"') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') +def likelihood_fit(**kwargs): + """ + Perform likelihood fit on a workspace + """ + do_minos = kwargs.pop("minos") + rebuild = kwargs.pop("rebuild") + from quickstats.utils.string_utils import split_str + pois = split_str(kwargs.pop("pois"), sep=',', remove_empty=True) + _kwargs = {} + for arg_name in ["outname", "save_log", "display", "save_result", + "export_as_np_pulls", "outdir", "save_ws", "save_snapshot"]: + _kwargs[arg_name] = kwargs.pop(arg_name) + _init_kwargs = {} + for arg_name in ["filename", "data_name", "verbosity"]: + _init_kwargs[arg_name] = kwargs.pop(arg_name) + _init_kwargs['config'] = kwargs + _init_kwargs['poi_name'] = pois + from quickstats.components import AnalysisBase + if _kwargs['save_log']: + from quickstats.concurrent.logging import standard_log + log_path = os.path.splitext(_kwargs["outname"])[0] + ".log" + with standard_log(log_path) as logger: + analysis = AnalysisBase(**_init_kwargs) + if _kwargs['export_as_np_pulls']: + analysis.minimizer.configure(hesse=True) + fit_result = analysis.nll_fit(mode=3, do_minos=do_minos) + print(f"INFO: Saved fit log to `{log_path}`") + else: + analysis = AnalysisBase(**_init_kwargs) + fit_result = analysis.nll_fit(mode=3, do_minos=do_minos) + output = {} + output['fit_result'] = fit_result + df = {'pois':{}, 'nuisance_parameters':{}} + analysis.load_snapshot("currentSnapshot") + df['pois']['prefit'] = analysis.model.as_dataframe('poi') + df['nuisance_parameters']['prefit'] = analysis.model.as_dataframe('nuisance_parameter') + analysis.load_snapshot("nllFit") + if do_minos: + df['pois']['postfit'] = analysis.model.as_dataframe('poi', asym_error=True) + else: + df['pois']['postfit'] = analysis.model.as_dataframe('poi') + df['nuisance_parameters']['postfit'] = analysis.model.as_dataframe('nuisance_parameter') + if _kwargs['display']: + import pandas as pd + pd.set_option('display.max_rows', None) + for key in ['pois', 'nuisance_parameters']: + df[key]['combined'] = df[key]['prefit'].drop(["value", "error"], axis=1) + df[key]['combined']['value_prefit'] = df[key]['prefit']['value'] + df[key]['combined']['value_postfit'] = df[key]['postfit']['value'] + df[key]['combined']['error_prefit'] = df[key]['prefit']['error'] + if (key == "pois") and do_minos: + df[key]['combined']['errorlo_postfit'] = df[key]['postfit']['errorlo'] + df[key]['combined']['errorhi_postfit'] = df[key]['postfit']['errorhi'] + else: + df[key]['combined']['error_postfit'] = df[key]['postfit']['error'] + output[key] = df[key]['combined'].to_dict("list") + if _kwargs['display']: + print("{}:".format(key.title())) + print(df[key]['combined']) + print() + if _kwargs['save_result']: + import json + with open(_kwargs["outname"], "w") as f: + json.dump(output, f, indent=2) + print(f"INFO: Saved fit result to `{_kwargs['outname']}`") + if _kwargs['export_as_np_pulls']: + outdir = _kwargs['outdir'] + if not os.path.exists(outdir): + os.makedirs(outdir) + nuis_df = df[key]['combined'].drop(['min', 'max', 'is_constant', 'error_prefit'], axis=1) + nuis_df = nuis_df.rename(columns={"value_prefit":"nuis_nom", "name":"nuisance", + "value_postfit":"nuis_hat", "error_postfit":"nuis_hi"}) + nuis_df["nuis_lo"] = nuis_df["nuis_hi"] + nuis_df["nuis_prefit"] = 1.0 + nuis_df = nuis_df.set_index(['nuisance']) + constrained_np = [i.GetName() for i in analysis.model.get_constrained_nuisance_parameters()] + nuis_df = nuis_df.loc[constrained_np].reset_index() + nuis_data = nuis_df.to_dict('index') + import json + for i in nuis_data: + data = nuis_data[i] + np_name = data['nuisance'] + outpath = os.path.join(outdir, f"{np_name}.json") + with open(outpath, "w") as outfile: + json.dump({"nuis": data}, outfile, indent=2) + if _kwargs["save_ws"] is not None: + filename = _kwargs["save_ws"] + if _kwargs["save_snapshot"] is not None: + snapshot_name = _kwargs["save_snapshot"] + from quickstats.components.basics import WSArgument + analysis.save_snapshot(snapshot_name, WSArgument.MUTABLE) + analysis.load_snapshot(analysis.kInitialSnapshotName) + analysis.save(filename, rebuild=rebuild) + +@cli.command(name='likelihood_scan') +@click.option('-i', '--input_path', required=True, + help='Input directory/path containing the workspace file(s) to process.') +@click.option('--file_expr', default=None, show_default=True, + help='\b\n File name expression describing the external parameterisation.' + '\b\n Example: "<mass[F]>_kl_<klambda[P]>"' + '\b\n Regular expression is supported' + '\b\n Refer to documentation for more information') +@click.option('-p', '--param_expr', default=None, + help='\b\n Parameter expression, e.g.' + '\b\n 1D scan: "poi_name=<poi_min>_<poi_max>_<step>"' + '\b\n 2D scan: "poi_1_name=<poi_1_min>_<poi_1_max>_<step_1>,' + '\b\n poi_2_name=<poi_2_min>_<poi_2_max>_<step_2>"') +@click.option('--filter', 'filter_expr', default=None, show_default=True, + help='\b\n Filter parameter points by expression.' + '\b\n Example: "mass=2*,350,400,450;klambda=1.*,2.*,-1.*,-2.*"' + '\b\n Refer to documentation for more information') +@click.option('--exclude', 'exclude_expr', default=None, show_default=True, + help='\b\n Exclude parameter points by expression.' + '\b\n Example: "mass=2*,350,400,450;klambda=1.*,2.*,-1.*,-2.*"' + '\b\n Refer to documentation for more information') +@click.option('--cache/--no-cache', default=True, show_default=True, + help='Cache existing result.') +@click.option('-o', '--outname', default='{poi_names}.json', show_default=True, + help='Name of output file.') +@click.option('--outdir', default='likelihood_scan', show_default=True, + help='Output directory.') +@click.option('--cachedir', default='cache', show_default=True, + help='Cache directory relative to the output directory.') +@click.option('--save_log/--skip_log', default=True, show_default=True, + help='Save log file.') +@click.option('-w', '--workspace', 'ws_name', default=None, show_default=True, + help='Name of workspace. Auto-detect by default.') +@click.option('-m', '--model_config', 'mc_name', default=None, show_default=True, + help='Name of model config. Auto-detect by default.') +@click.option('-d', '--data', 'data_name', default='combData', show_default=True, + help='Name of dataset.') +@click.option('-s', '--snapshot', 'snapshot_name', default=None, show_default=True, + help='Name of initial snapshot.') +@click.option('--uncond_snapshot', default=None, show_default=True, + help='Name of snapshot with unconditional fit result.') +@click.option('-r', '--profile', 'profile_param', default="", show_default=True, + help='Parameters to profile.') +@click.option('-f', '--fix', 'fix_param', default="", show_default=True, + help='Parameters to fix.') +@click.option('--constrain/--no-constrain', 'constrain_nuis', default=True, show_default=True, + help='Use constrained NLL (i.e. include systematics).') +@click.option('--allow-nan/--not-allow-nan', default=True, show_default=True, + help='Allow cached nll to be nan.') +@click.option('-t', '--minimizer_type', default="Minuit2", show_default=True, + help='Minimizer type.') +@click.option('-a', '--minimizer_algo', default="Migrad", show_default=True, + help='Minimizer algorithm.') +@click.option('--strategy', type=int, default=1, show_default=True, + help='Default minimization strategy.') +@click.option('-e', '--eps', type=float, default=1.0, show_default=True, + help='Minimization convergence criterium.') +@click.option('--retry', type=int, default=1, show_default=True, + help='Maximum number of retries upon a failed fit.') +@click.option('--optimize', type=int, default=2, show_default=True, + help='Optimize constant terms.') +@click.option('--minimizer_offset', type=int, default=1, show_default=True, + help='Enable minimizer offsetting.') +@click.option('--offset/--no-offset', default=True, show_default=True, + help='Offset likelihood.') +@click.option('--binned/--unbinned', 'binned_likelihood', default=True, show_default=True, + help='Activate binned likelihood for RooRealSumPdf.') +@click.option('--print_level', type=int, default=-1, show_default=True, + help='Minimizer print level.') +@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, + help='Number of CPUs to use during minimization.') +@click.option('--batch_mode/--no-batch', default=False, show_default=True, + help='Batch mode when evaluating likelihood.') +@click.option('--int_bin_precision', type=float, default=-1., show_default=True, + help='Integrate the PDF over the bins instead of using the probability ' + 'density at the bin center.') +@click.option('--extra_minimizer_options', default=None, show_default=True, + help='Additional minimizer options to include. Format should be <config>=<value> ' + 'separated by commas. Example: "discrete_min_tol=0.001,do_discrete_iteration=1"') +@click.option('--cms_runtimedef', 'runtimedef_expr', default=None, show_default=True, + help='CMS specific runtime definitions. Format should be <config>=<value> ' + 'separated by commas. Example: "REMOVE_CONSTANT_ZERO_POINT=1,ADDNLL_GAUSSNLL=0"') +@click.option('--parallel', type=int, default=-1, show_default=True, + help='\b\n Parallelize job across the N workers.' + '\b\n Case 0: Jobs are run sequentially (for debugging).' + '\b\n Case -1: Jobs are run across N_CPU workers.') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') +def likelihood_scan(**kwargs): + """ + Evaluate a set of parmeterised likelihood values + """ + _kwargs = {} + for arg_name in ["input_path", "file_expr", "param_expr", "data_name", "outdir", "filter_expr", + "uncond_snapshot", "exclude_expr", "outname", "cache", "cachedir", "save_log", + "parallel", "verbosity", "allow_nan"]: + _kwargs[arg_name] = kwargs.pop(arg_name) + _kwargs['config'] = kwargs + from quickstats.concurrent import ParameterisedLikelihood + runner = ParameterisedLikelihood(**_kwargs) + runner.run() + +@cli.command(name='significance_scan') +@click.option('-i', '--input_path', required=True, + help='Path to the input workspace file or directory containing the parameterised ' + 'input workspace files.') +@click.option('-p', '--poi', 'poi_name', default=None, + help='Name of the parameter of interest (POI). If None, the first POI is used.') +@click.option('--mu_exp', type=float, default=0., show_default=True, + help='Expected value of the POI under the null hypothesis.') +@click.option('--asimov_type', type=int, default=None, + help='\b\n Evaluate significance on an Asimov dataset of this type. ' + 'If not specified, the observed data is used. ' + '\b\n Choices of asimov types are' + '\b\n 0: fit with POI fixed to 0' + '\b\n 1: fit with POI fixed to 1' + '\b\n 2: fit with POI free and set POI to 1 after fit' + '\b\n 3: fit with POI and constrained NP fixed to 0' + '\b\n 4: fit with POI fixed to 1 and constrained NP fixed to 0' + '\b\n 5: fit with POI free and constrained NP fixed to 0 and set POI to 1 after fit' + '\b\n -1: nominal NP with POI set to 0' + '\b\n -2: nominal NP with POI set to 1') +@click.option('--file_expr', default=r"[\w-]+", show_default=True, + help='\b\n File name expression describing the external parameterisation.' + '\b\n Example: "<mass[F]>_kl_<klambda[P]>"' + '\b\n Regular expression is supported' + '\b\n Refer to documentation for more information') +@click.option('--param_expr', default=None, show_default=True, + help='\b\n Parameter name expression describing the internal parameterisation.' + '\b\n Example: "klambda=-10_10_0.2,k2v=(1,2,3)"' + '\b\n Refer to documentation for more information') +@click.option('--filter', 'filter_expr', default=None, show_default=True, + help='\b\n Filter parameter points by expression.' + '\b\n Example: "mass=(2*,350,400,450);klambda=(1.*,2.*,-1.*,-2.*)"' + '\b\n Refer to documentation for more information') +@click.option('--exclude', 'exclude_expr', default=None, show_default=True, + help='\b\n Exclude parameter points by expression.' + '\b\n Example: "mass=(2*,350,400,450);klambda=(1.*,2.*,-1.*,-2.*)"' + '\b\n Refer to documentation for more information') +@click.option('--outdir', default='significance', show_default=True, + help='Output directory where cached limit files and the merged limit file are saved.') +@click.option('--cachedir', default='cache', show_default=True, + help='Cache directory relative to the output directory.') +@click.option('--cache/--no-cache', default=True, show_default=True, + help='Cache existing result.') +@click.option('-o', '--outname', default='{param_names}.json', show_default=True, + help='Name of the output significance file (all parameter points merged).') +@click.option('--save_log/--skip_log', default=True, show_default=True, + help='Save log file.') +@click.option('-w', '--workspace', 'ws_name', default=None, show_default=True, + help='Name of workspace. Auto-detect by default.') +@click.option('-m', '--model_config', 'mc_name', default=None, show_default=True, + help='Name of model config. Auto-detect by default.') +@click.option('-d', '--data', 'data_name', default='combData', show_default=True, + help='Name of dataset.') +@click.option('-s', '--snapshot', 'snapshot_name', default=None, show_default=True, + help='Name of initial snapshot.') +@click.option('-r', '--profile', 'profile_param', default="", show_default=True, + help='Parameters to profile.') +@click.option('-f', '--fix', 'fix_param', default="", show_default=True, + help='Parameters to fix.') +@click.option('-t', '--minimizer_type', default="Minuit2", show_default=True, + help='Minimizer type.') +@click.option('-a', '--minimizer_algo', default="Migrad", show_default=True, + help='Minimizer algorithm.') +@click.option('--strategy', type=int, default=1, show_default=True, + help='Default minimization strategy.') +@click.option('-e', '--eps', type=float, default=1.0, show_default=True, + help='Minimization convergence criterium.') +@click.option('--retry', type=int, default=1, show_default=True, + help='Maximum number of retries upon a failed fit.') +@click.option('--optimize', type=int, default=2, show_default=True, + help='Optimize constant terms.') +@click.option('--minimizer_offset', type=int, default=1, show_default=True, + help='Enable minimizer offsetting.') +@click.option('--offset/--no-offset', default=True, show_default=True, + help='Offset likelihood.') +@click.option('--binned/--unbinned', 'binned_likelihood', default=True, show_default=True, + help='Activate binned likelihood for RooRealSumPdf.') +@click.option('--print_level', type=int, default=-1, show_default=True, + help='Minimizer print level.') +@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, + help='Number of CPUs to use during minimization.') +@click.option('--batch_mode/--no-batch', default=False, show_default=True, + help='Batch mode when evaluating likelihood.') +@click.option('--int_bin_precision', type=float, default=-1., show_default=True, + help='Integrate the PDF over the bins instead of using the probability ' + 'density at the bin center.') +@click.option('--extra_minimizer_options', default=None, show_default=True, + help='Additional minimizer options to include. Format should be <config>=<value> ' + 'separated by commas. Example: "discrete_min_tol=0.001,do_discrete_iteration=1"') +@click.option('--cms_runtimedef', 'runtimedef_expr', default=None, show_default=True, + help='CMS specific runtime definitions. Format should be <config>=<value> ' + 'separated by commas. Example: "REMOVE_CONSTANT_ZERO_POINT=1,ADDNLL_GAUSSNLL=0"') +@click.option('--parallel', type=int, default=-1, show_default=True, + help='\b\n Parallelize job across the N workers.' + '\b\n Case 0: Jobs are run sequentially (for debugging).' + '\b\n Case -1: Jobs are run across N_CPU workers.') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') +def significance_scan(**kwargs): + """ + Evaluate a set of parmeterised significance values + """ + _kwargs = {} + for arg_name in ["input_path", "poi_name", "data_name", "file_expr", "param_expr", + "filter_expr", "exclude_expr", "mu_exp", "asimov_type", + "snapshot_name", "outdir", "cachedir", "outname", "cache", + "save_log", "parallel", "verbosity"]: + _kwargs[arg_name] = kwargs.pop(arg_name) + _kwargs['config'] = kwargs + from quickstats.concurrent import ParameterisedSignificance + runner = ParameterisedSignificance(**_kwargs) + runner.run() \ No newline at end of file diff --git a/quickstats/clis/limit_setting.py b/quickstats/clis/limit_setting.py index c5058df09765adca8c3115dc7314246f78ed7eca..7468606f08b3f35e0540fb43dd9c8cba1ddcb1bb 100644 --- a/quickstats/clis/limit_setting.py +++ b/quickstats/clis/limit_setting.py @@ -1,7 +1,103 @@ import os import click + +from .core import cli + +__all__ = ['cls_limit', 'limit_scan'] + +@cli.command(name='cls_limit') +@click.option('-i', '--input_file', 'filename', required=True, + help='Path to the input workspace file.') +@click.option('-p', '--poi', 'poi_name', default=None, show_default=True, + help='Name of the parameter of interest (POI). If not specified, the first POI from the workspace is used.') +@click.option('-d', '--data', 'data_name', default='combData', show_default=True, + help='Name of dataset.') +@click.option('--asimov_data_name', 'asimov_data_name', default=None, + help='If given, use custom background asimov dataset instead of generating on the fly.') +@click.option('-o', '--outname', default='limits.json', show_default=True, + help='Name of output limit file.') +@click.option('--mu_exp', type=float, default=0, show_default=True, + help='(DO NOT USE) Expected signal strengh value to be used for Asimov generation.') +@click.option('--blind/--unblind', 'do_blind', default=True, show_default=True, + help='Blind/unblind analysis.') +@click.option('--CL', 'CL', type=float, default=0.95, show_default=True, + help='CL value to use.') +@click.option('--precision', default=0.005, show_default=True, + help='precision in mu that defines iterative cutoff.') +@click.option('--adjust_fit_range/--keep_fit_range', default=True, show_default=True, + help='whether to adjust the fit range to median limit +- 5 sigma for observed fit.') +@click.option('--do_tilde/--no_tilde', default=True, show_default=True, + help='bound mu at zero if true and do the \tilde{q}_{mu} asymptotics.') +@click.option('--predictive_fit/--no_predictive_fit', default=False, show_default=True, + help='extrapolate best fit nuisance parameters based on previous fit results.') +@click.option('--do_better_bands/--skip_better_bands', default=True, show_default=True, + help='evaluate asymptotic CLs limit for various sigma bands.') +@click.option('--better_negative_bands/--skip_better_negative_bands', default=False, show_default=True, + help='evaluate asymptotic CLs limit for negative sigma bands.') +@click.option('--binned/--unbinned', 'binned_likelihood', default=True, show_default=True, + help='Activate binned likelihood for RooRealSumPdf.') +@click.option('--save_summary/--skip_summary', default=True, show_default=True, + help='Save summary information.') +@click.option('-f', '--fix', 'fix_param', default="", show_default=True, + help='Parameters to fix.') +@click.option('-r', '--profile', 'profile_param', default="", show_default=True, + help='Parameters to profile.') +@click.option('-w', '--workspace', 'ws_name', default=None, show_default=True, + help='Name of workspace. Auto-detect by default.') +@click.option('-m', '--model_config', 'mc_name', default=None, show_default=True, + help='Name of model config. Auto-detect by default.') +@click.option('-s', '--snapshot', 'snapshot_name', default=None, show_default=True, + help='Name of initial snapshot.') +@click.option('--constrain/--no-constrain', 'constrain_nuis', default=True, show_default=True, + help='Use constrained NLL.') +@click.option('-t', '--minimizer_type', default="Minuit2", show_default=True, + help='Minimizer type.') +@click.option('-a', '--minimizer_algo', default="Migrad", show_default=True, + help='Minimizer algorithm.') +@click.option('--strategy', type=int, default=1, show_default=True, + help='Default minimization strategy.') +@click.option('-e', '--eps', type=float, default=1.0, show_default=True, + help='Minimization convergence criterium.') +@click.option('--retry', type=int, default=2, show_default=True, + help='Maximum number of retries upon a failed fit.') +@click.option('--optimize', type=int, default=2, show_default=True, + help='Optimize constant terms.') +@click.option('--minimizer_offset', type=int, default=1, show_default=True, + help='Enable minimizer offsetting.') +@click.option('--offset/--no-offset', default=True, show_default=True, + help='Offset likelihood.') +@click.option('--binned/--unbinned', 'binned_likelihood', default=True, show_default=True, + help='Activate binned likelihood for RooRealSumPdf.') +@click.option('--print_level', type=int, default=-1, show_default=True, + help='Minimizer print level.') +@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, + help='Number of CPUs to use during minimization.') +@click.option('--batch_mode/--no-batch', default=False, show_default=True, + help='Batch mode when evaluating likelihood.') +@click.option('--int_bin_precision', type=float, default=-1., show_default=True, + help='Integrate the PDF over the bins instead of using the probability ' + 'density at the bin center.') +@click.option('--extra_minimizer_options', default=None, show_default=True, + help='Additional minimizer options to include. Format should be <config>=<value> ' + 'separated by commas. Example: "discrete_min_tol=0.001,do_discrete_iteration=1"') +@click.option('--cms_runtimedef', 'runtimedef_expr', default=None, show_default=True, + help='CMS specific runtime definitions. Format should be <config>=<value> ' + 'separated by commas. Example: "REMOVE_CONSTANT_ZERO_POINT=1,ADDNLL_GAUSSNLL=0"') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') +def cls_limit(**kwargs): + """ + Tool for evaluating Asymptotic CLs limit + """ + from quickstats.components import AsymptoticCLs + outname = kwargs.pop('outname') + save_summary = kwargs.pop('save_summary') + asymptotic_cls = AsymptoticCLs(**kwargs) + asymptotic_cls.evaluate_limits() + asymptotic_cls.save(outname, summary=save_summary) -@click.command(name='limit_scan') +@cli.command(name='limit_scan') @click.option('-i', '--input_path', 'input_path', required=True, help='Input directory/path containing the workspace file(s) to process.') @click.option('--file_expr', default=r"[\w-]+", show_default=True, @@ -11,113 +107,110 @@ import click '\b\n Refer to documentation for more information') @click.option('--param_expr', default=None, show_default=True, help='\b\n Parameter name expression describing the internal parameterisation.' - '\b\n\n Example: "klambda=-10_10_0.2,k2v=1"' + '\b\n Example: "klambda=-10_10_0.2,k2v=(1,2,3)"' '\b\n Refer to documentation for more information') @click.option('--filter', 'filter_expr', default=None, show_default=True, - help='\b Filter parameter points by expression.\n' - '\b Example: "mass=2*,350,400,450;klambda=1.*,2.*,-1.*,-2.*"\n' - '\b Refer to documentation for more information\n') + help='\b\n Filter parameter points by expression.' + '\b\n Example: "mass=(2*,350,400,450);klambda=(1.*,2.*,-1.*,-2.*)"' + '\b\n Refer to documentation for more information') @click.option('--exclude', 'exclude_expr', default=None, show_default=True, - help='\b Exclude parameter points by expression.\n' - '\b Example: "mass=2*,350,400,450;klambda=1.*,2.*,-1.*,-2.*"\n' - '\b Refer to documentation for more information\n') + help='\b\n Exclude parameter points by expression.' + '\b\n Example: "mass=(2*,350,400,450);klambda=(1.*,2.*,-1.*,-2.*)"' + '\b\n Refer to documentation for more information') @click.option('--outdir', default='output', show_default=True, - help='Output directory where cached limit files and merged limit file are saved.') + help='Output directory where cached limit files and the merged limit file are saved.') @click.option('--cachedir', default='cache', show_default=True, help='Cache directory relative to the output directory.') @click.option('-o', '--outname', default='limits.json', show_default=True, - help='Name of output limit file (all parameter points merged).') + help='Name of the output limit file (all parameter points merged).') @click.option('--cache/--no-cache', default=True, show_default=True, - help='Cache output of individual parameter point') + help='Cache output of individual parameter point.') @click.option('--save-log/--no-log', default=True, show_default=True, help='Save a log file for each parameter point.') @click.option('--save-summary/--no-summary', default=True, show_default=False, help='Save a summary file for each parameter point.') -@click.option('--parallel', type=int, default=-1, show_default=True, - help='\b\n Parallelize job across the N workers.' - '\b\n Case 0: Jobs are run sequentially (for debugging).' - '\b\n Case -1: Jobs are run across N_CPU workers.') @click.option('-p', '--poi', 'poi_name', default=None, help='Name of the parameter of interest (POI). If None, the first POI is used.') @click.option('--mu_exp', type=float, default=0, show_default=True, - help='Expected signal strengh value to be used for Asimov generation') + help='(DO NOT USE) Expected signal strengh value to be used for Asimov generation') @click.option('-d', '--data', 'data_name', default='combData', show_default=True, - help='Name of dataset') + help='Name of dataset.') @click.option('--asimov_data_name', 'asimov_data_name', default=None, help='If given, use custom background asimov dataset instead of generating on the fly.') @click.option('--blind/--unblind', 'do_blind', default=True, show_default=True, - help='Blind/unblind analysis') + help='Blind/unblind analysis.') @click.option('--CL', 'CL', type=float, default=0.95, show_default=True, - help='CL value to use') + help='CL value to use.') @click.option('--precision', default=0.005, show_default=True, - help='precision in mu that defines iterative cutoff') + help='precision in mu that defines iterative cutoff.') @click.option('--adjust_fit_range/--keep_fit_range', default=True, show_default=True, - help='whether to adjust the fit range to median limit +- 5 sigma for observed fit') + help='whether to adjust the fit range to median limit +- 5 sigma for observed fit.') @click.option('--do_tilde/--no_tilde', default=True, show_default=True, - help='bound mu at zero if true and do the \tilde{q}_{mu} asymptotics') + help='bound mu at zero if true and do the \tilde{q}_{mu} asymptotics.') @click.option('--predictive_fit/--no_predictive_fit', default=False, show_default=True, - help='extrapolate best fit nuisance parameters based on previous fit results') + help='extrapolate best fit nuisance parameters based on previous fit results.') @click.option('--do_better_bands/--skip_better_bands', default=True, show_default=True, - help='evaluate asymptotic CLs limit for various sigma bands') + help='evaluate asymptotic CLs limit for various sigma bands.') @click.option('--better_negative_bands/--skip_better_negative_bands', default=False, show_default=True, - help='evaluate asymptotic CLs limit for negative sigma bands') + help='evaluate asymptotic CLs limit for negative sigma bands.') @click.option('--binned/--unbinned', 'binned_likelihood', default=True, show_default=True, - help='Binned likelihood') + help='Activate binned likelihood for RooRealSumPdf.') @click.option('--save_log/--skip_log', default=True, show_default=True, - help='Save log file') + help='Save log file.') @click.option('--save_summary/--skip_summary', default=True, show_default=True, - help='Save summary information') + help='Save summary information.') @click.option('-f', '--fix', 'fix_param', default="", show_default=True, - help='Parameters to fix') + help='Parameters to fix.') @click.option('-r', '--profile', 'profile_param', default="", show_default=True, - help='Parameters to profile') + help='Parameters to profile.') @click.option('-w', '--workspace', 'ws_name', default=None, show_default=True, help='Name of workspace. Auto-detect by default.') @click.option('-m', '--model_config', 'mc_name', default=None, show_default=True, help='Name of model config. Auto-detect by default.') @click.option('-s', '--snapshot', 'snapshot_name', default=None, show_default=True, - help='Name of initial snapshot') + help='Name of initial snapshot.') +@click.option('--constrain/--no-constrain', 'constrain_nuis', default=True, show_default=True, + help='Use constrained NLL.') @click.option('-t', '--minimizer_type', default="Minuit2", show_default=True, - help='Minimizer type') + help='Minimizer type.') @click.option('-a', '--minimizer_algo', default="Migrad", show_default=True, - help='Minimizer algorithm') + help='Minimizer algorithm.') +@click.option('--strategy', type=int, default=1, show_default=True, + help='Default minimization strategy.') @click.option('-e', '--eps', type=float, default=1.0, show_default=True, - help='Convergence criterium') + help='Minimization convergence criterium.') @click.option('--retry', type=int, default=2, show_default=True, - help='Maximum number of retries upon a failed fit') -@click.option('--strategy', type=int, default=1, show_default=True, - help='Default minimization strategy') -@click.option('--print_level', type=int, default=-1, show_default=True, - help='Minimizer print level') -@click.option('--timer/--no_timer', default=False, show_default=True, - help='Enable minimizer timer') -@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, - help='Number of CPUs to use per parameter') -@click.option('--offset/--no-offset', default=True, show_default=True, - help='Offset likelihood') + help='Maximum number of retries upon a failed fit.') @click.option('--optimize', type=int, default=2, show_default=True, - help='Optimize constant terms') -@click.option('--improve', type=int, default=0, show_default=True, - help='Execute improve after each minimization') + help='Optimize constant terms.') @click.option('--minimizer_offset', type=int, default=1, show_default=True, - help='Enable minimizer offsetting') -@click.option('--fix-cache/--no-fix-cache', default=True, show_default=True, - help='Fix StarMomentMorph cache') -@click.option('--fix-multi/--no-fix-cache', default=True, show_default=True, - help='Fix MultiPdf level 2') -@click.option('--max_calls', type=int, default=-1, show_default=True, - help='Maximum number of function calls') -@click.option('--max_iters', type=int, default=-1, show_default=True, - help='Maximum number of Minuit iterations') + help='Enable minimizer offsetting.') +@click.option('--offset/--no-offset', default=True, show_default=True, + help='Offset likelihood.') +@click.option('--binned/--unbinned', 'binned_likelihood', default=True, show_default=True, + help='Activate binned likelihood for RooRealSumPdf.') +@click.option('--print_level', type=int, default=-1, show_default=True, + help='Minimizer print level.') +@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, + help='Number of CPUs to use during minimization.') @click.option('--batch_mode/--no-batch', default=False, show_default=True, - help='Batch mode when evaluating likelihood') + help='Batch mode when evaluating likelihood.') @click.option('--int_bin_precision', type=float, default=-1., show_default=True, help='Integrate the PDF over the bins instead of using the probability ' - 'density at the bin centre') -@click.option('--constrain/--no-constrain', 'constrain_nuis', default=True, show_default=True, - help='Use constrained NLL') -@click.option('-v', '--verbosity', default="INFO", show_default=True, - help='verbosity level ("DEBUG", "INFO", "WARNING", "ERROR")') + 'density at the bin center.') +@click.option('--extra_minimizer_options', default=None, show_default=True, + help='Additional minimizer options to include. Format should be <config>=<value> ' + 'separated by commas. Example: "discrete_min_tol=0.001,do_discrete_iteration=1"') +@click.option('--cms_runtimedef', 'runtimedef_expr', default=None, show_default=True, + help='CMS specific runtime definitions. Format should be <config>=<value> ' + 'separated by commas. Example: "REMOVE_CONSTANT_ZERO_POINT=1,ADDNLL_GAUSSNLL=0"') +@click.option('--parallel', type=int, default=-1, show_default=True, + help='\b\n Parallelize job across the N workers.' + '\b\n Case 0: Jobs are run sequentially (for debugging).' + '\b\n Case -1: Jobs are run across N_CPU workers.') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') def limit_scan(**kwargs): """ Evaluate a set of parmeterised asymptotic cls limits diff --git a/quickstats/clis/nuisance_parameter_tools.py b/quickstats/clis/nuisance_parameter_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..a85496cf68204ebd2604d4708e97eab0c7a9d50b --- /dev/null +++ b/quickstats/clis/nuisance_parameter_tools.py @@ -0,0 +1,320 @@ +import os +import json +import click + +from .core import cli + +__all__ = ['harmonize_np', 'run_pulls', 'plot_pulls', 'np_correlation'] + +@cli.command(name='harmonize_np') +@click.argument('ws_files', nargs=-1) +@click.option('-r', '--reference', required=True, help='Path to reference json file containing renaming scheme.') +@click.option('-i', '--input_config_path', default=None, show_default=True, + help='Path to json file containing input workspace paths.') +@click.option('-b', '--base_path', default='./', show_default=True, + help='Base path for input config.') +@click.option('-o', '--outfile', default='renamed_np.json', show_default=True, + help='Output filename.') +def harmonize_np(ws_files, reference, input_config_path, base_path, outfile): + """ + Harmonize NP names across different workspaces + """ + from quickstats.components import NuisanceParameterHarmonizer + harmonizer = NuisanceParameterHarmonizer(reference) + if (len(ws_files) > 0) and input_config_path is not None: + raise RuntimeError('either workspace paths or json file containing workspace paths should be given') + if len(ws_files) > 0: + harmonizer.harmonize(ws_files, outfile=outfile) + elif (input_config_path is not None): + harmonizer.harmonize_multi_input(input_config_path, base_path, outfile=outfile) + +@cli.command(name='run_pulls') +@click.option('-i', '--input_file', 'filename', required=True, + help='Path to the input workspace file.') +@click.option('-x', '--poi', 'poi_name', default=None, + help='POI to measure NP impact on.') +@click.option('-o', '--outdir', default="pulls", show_default=True, + help='Output directory.') +@click.option('-w', '--workspace', 'ws_name', default=None, + help='Name of workspace. Auto-detect by default.') +@click.option('-m', '--model_config', 'mc_name', default=None, + help='Name of model config. Auto-detect by default.') +@click.option('-d', '--data', 'data_name', default='combData', show_default=True, + help='Name of dataset.') +@click.option('--filter', 'filter_expr', default=None, show_default=True, + help='Filter nuisance parameter(s) to run pulls and impacts on.'+\ + 'Multiple parameters are separated by commas.'+\ + 'Wildcards are accepted. All NPs are included by default.') +@click.option('-r', '--profile', 'profile_param', default=None, show_default=True, + help='Parameters to profile.') +@click.option('-f', '--fix', 'fix_param', default=None, show_default=True, + help='Parameters to fix.') +@click.option('-s', '--snapshot', 'snapshot_name', default=None, show_default=True, + help='Name of initial snapshot.') +@click.option('-t', '--minimizer_type', default="Minuit2", show_default=True, + help='Minimizer type.') +@click.option('-a', '--minimizer_algo', default="Migrad", show_default=True, + help='Minimizer algorithm.') +@click.option('--strategy', type=int, default=1, show_default=True, + help='Default minimization strategy.') +@click.option('-e', '--eps', type=float, default=1.0, show_default=True, + help='Minimization convergence criterium.') +@click.option('-q', '--precision', type=float, default=0.001, show_default=True, + help='Precision of sigma scan.') +@click.option('--retry', type=int, default=1, show_default=True, + help='Maximum number of retries upon a failed fit.') +@click.option('--optimize', type=int, default=2, show_default=True, + help='Optimize constant terms.') +@click.option('--minimizer_offset', type=int, default=1, show_default=True, + help='Enable minimizer offsetting.') +@click.option('--offset/--no-offset', default=True, show_default=True, + help='Offset likelihood.') +@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, + help='Number of CPUs to use during minimization.') +@click.option('--print_level', type=int, default=-1, show_default=True, + help='Minimizer print level.') +@click.option('--batch_mode/--no-batch', default=False, show_default=True, + help='Batch mode when evaluating likelihood.') +@click.option('--int_bin_precision', type=float, default=-1., show_default=True, + help='Integrate the PDF over the bins instead of using the probability ' + 'density at the bin center.') +@click.option('--parallel', type=int, default=-1, show_default=True, + help='\b\n Parallelize job across the N workers.' + '\b\n Case 0: Jobs are run sequentially (for debugging).' + '\b\n Case -1: Jobs are run across N_CPU workers.') +@click.option('--cache/--no-cache', default=True, show_default=True, + help='Cache existing result.') +@click.option('--exclude', 'exclude_expr', default=None, show_default=True, + help='Exclude NPs to run pulls and impacts on. '+\ + 'Multiple parameters are separated by commas.'+\ + 'Wildcards are accepted.') +@click.option('--save_log/--skip_log', default=True, show_default=True, + help='Save log file.') +@click.option('--constrained_only/--any_nuis', default=True, show_default=True, + help='Whether to include constrained nuisance parameters only.') +@click.option('--extra_minimizer_options', default=None, show_default=True, + help='Additional minimizer options to include. Format should be <config>=<value> ' + 'separated by commas. Example: "discrete_min_tol=0.001,do_discrete_iteration=1"') +@click.option('--cms_runtimedef', 'runtimedef_expr', default=None, show_default=True, + help='CMS specific runtime definitions. Format should be <config>=<value> ' + 'separated by commas. Example: "REMOVE_CONSTANT_ZERO_POINT=1,ADDNLL_GAUSSNLL=0"') +@click.option('--version', type=click.Choice(['1', '2']), default='2', show_default=True, + help='Version of tool to use (Choose between 1 and 2).') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') +def run_pulls(**kwargs): + """ + Tool for computing NP pulls and impacts + """ + version = kwargs.pop('version') + if version == '1': + from quickstats.components import NuisanceParameterPull + NuisanceParameterPull().run_pulls(**kwargs) + elif version == '2': + from quickstats.concurrent import NuisanceParameterRankingRunner + init_kwargs = {} + for key in ['filename', 'filter_expr', 'exclude_expr', 'poi_name', + 'data_name', 'cache', 'outdir', 'constrained_only', + 'save_log', 'parallel', 'verbosity']: + init_kwargs[key] = kwargs.pop(key) + init_kwargs['config'] = kwargs + runner = NuisanceParameterRankingRunner(**init_kwargs) + runner.run() + +@cli.command(name='plot_pulls') +@click.option('-i', '--inputdir', required=True, help='Path to directory containing pull results') +@click.option('-p', '--poi', default=None, help='Parameter of interest for plotting impact') +@click.option('-n', '--n_rank', type=int, default=None, help='Total number of NP to rank') +@click.option('-m', '--rank_per_plot', type=int, default=20, show_default=True, + help='Number of NP to show in a single plot.') +@click.option('--ranking/--no_ranking', default=True, show_default=True, + help='Rank NP by impact.') +@click.option('--threshold', type=float, default=0., show_default=True, + help='Filter NP by postfit impact threshold.') +@click.option('--show_sigma/--hide_sigma', default=True, show_default=True, + help='Show one standard deviation pull.') +@click.option('--show_prefit/--hide_prefit', default=True, show_default=True, + help='Show prefit impact.') +@click.option('--show_postfit/--hide_postfit', default=True, show_default=True, + help='Show postfit impact.') +@click.option('--sigma_bands/--no_sigma_bands', default=False, show_default=True, + help='Draw +-1, +-2 sigma bands.') +@click.option('--sigma_lines/--no_sigma_lines', default=True, show_default=True, + help='Draw +-1 sigma lines.') +@click.option('--ranking_label/--no_ranking_label', default=True, show_default=True, + help='Show ranking labels.') +@click.option('--shade/--no_shade', default=True, show_default=True, + help='Draw shade.') +@click.option('--correlation/--no_correlation', default=True, show_default=True, + help='Show correlation impact.') +@click.option('--onesided/--overlap', default=True, show_default=True, + help='Show onesided impact.') +@click.option('--relative/--absolute', default=False, show_default=True, + help='Show relative variation.') +@click.option('--theta_max', type=float, default=2, show_default=True, + help='Pull range.') +@click.option('-y', '--padding', type=int, default=7, show_default=True, + help='Padding below plot for texts and legends. NP column height is 1 unit.') +@click.option('-h', '--height', type=float, default=1.0, show_default=True, + help='NP column height.') +@click.option('-s', '--spacing', type=float, default=0., show_default=True, + help='Spacing between impact box.') +@click.option('--label-fontsize', type=float, default=20., show_default=True, + help='Fontsize of analysis label text.') +@click.option('-d', '--display_poi', default=r"$\mu$", show_default=True, + help='POI name to be shown in the plot.') +@click.option('-t', '--extra_text', default=None, help='Extra texts below the ATLAS label. '+\ + 'Use "//" as newline delimiter.') +@click.option('--elumi_label/--no_elumi_label', default=True, show_default=True, + help='Show energy and luminosity labels.') +@click.option('--ranking_label/--no_ranking_label', default=True, show_default=True, + help='Show ranking label.') +@click.option('--energy', default="13 TeV", show_default=True, + help='Beam energy.') +@click.option('--lumi', default="140 fb$^{-1}$", show_default=True, + help='Luminosity.') +@click.option('--status', default="int", show_default=True, + help='\b\n Analysis status. Choose from' + '\b\n int : Internal' + '\b\n wip : Work in Progress' + '\b\n prelim : Preliminary' + '\b\n final : *no status label*' + '\b\n *custom input* : *custom input*') +@click.option('--combine_pdf/--split_pdf', default=True, show_default=True, + help='Combine all ranking plots into a single pdf.') +@click.option('--outdir', default='ranking_plots', show_default=True, + help='Output directory.') +@click.option('-o', '--outname', default='ranking', show_default=True, + help='Output file name prefix.') +@click.option('--style', default='default', show_default=True, + help='Plotting style. Built-in styles are "default" and "trex".'+\ + 'Specify path to yaml file to set custom plotting style.') +@click.option('--fix_axis_scale/--free_axis_scale', default=True, show_default=True, + help='Fix the axis scale across all ranking plots.') +@click.option('--version', type=click.Choice(['1', '2']), default='2', show_default=True, + help='Version of tool to use (Choose between 1 and 2).') +def plot_pulls(**kwargs): + """ + Tool for plotting NP pulls and impact rankings + """ + from quickstats.plots.np_ranking_plot import NPRankingPlot + inputdir, poi = kwargs.pop('inputdir'), kwargs.pop('poi') + version = kwargs.pop('version') + ranking_plot = NPRankingPlot(inputdir, poi, version=version) + ranking_plot.plot(**kwargs) + +@cli.command(name='np_correlation') +@click.option('-i', '--input_file', "filename", required=True, + help='Path to the input workspace file.') +@click.option('-o', '--basename', default='NP_correlation_matrix', show_default=True, + help='Base name of the output.') +@click.option('--select', default=None, show_default=True, + help='Select specific NPs to be stored in the final output (for json and plot only). ' + 'Use comma to separate the selection (wild card is supported).') +@click.option('--remove', default=None, show_default=True, + help='Select specific NPs to be removed in the final output (for json and plot only). ' + 'Use comma to separate the selection (wild card is supported).') +@click.option('--save_plot/--no_save_plot', default=True, show_default=True, + help='Save NP correlation matrix as a plot in pdf format') +@click.option('--save_json/--no_save_json', default=False, show_default=True, + help='Save NP correlation matrix as a json file') +@click.option('--save_root/--no_save_root', default=False, show_default=True, + help='Save NP correlation matrix as a 2D histogram in a root file') +@click.option('--plot_style', default="default", show_default=True, + help='Plot style if save_plot is enabled. Choose between \"default\" and ' + f'\"viridis\". Alternatively, a path to a yaml config file can be used') +@click.option('-w', '--workspace', 'ws_name', default=None, show_default=True, + help='Name of workspace. Auto-detect by default.') +@click.option('-m', '--model_config', 'mc_name', default=None, show_default=True, + help='Name of model config. Auto-detect by default.') +@click.option('-d', '--data', 'data_name', default='combData', show_default=True, + help='Name of dataset.') +@click.option('-s', '--snapshot', 'snapshot_name', default=None, show_default=True, + help='Name of initial snapshot.') +@click.option('-r', '--profile', 'profile_param', default="", show_default=True, + help='Parameters to profile.') +@click.option('-f', '--fix', 'fix_param', default="", show_default=True, + help='Parameters to fix.') +@click.option('--constrain/--no-constrain', 'constrain_nuis', default=True, show_default=True, + help='Use constrained NLL (i.e. include systematics)') +@click.option('-t', '--minimizer_type', default="Minuit2", show_default=True, + help='Minimizer type.') +@click.option('-a', '--minimizer_algo', default="Migrad", show_default=True, + help='Minimizer algorithm.') +@click.option('--strategy', type=int, default=1, show_default=True, + help='Default minimization strategy.') +@click.option('-e', '--eps', type=float, default=1.0, show_default=True, + help='Minimization convergence criterium.') +@click.option('--retry', type=int, default=1, show_default=True, + help='Maximum number of retries upon a failed fit.') +@click.option('--optimize', type=int, default=2, show_default=True, + help='Optimize constant terms.') +@click.option('--minimizer_offset', type=int, default=1, show_default=True, + help='Enable minimizer offsetting.') +@click.option('--offset/--no-offset', default=True, show_default=True, + help='Offset likelihood.') +@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, + help='Number of CPUs to use during minimization.') +@click.option('--batch_mode/--no-batch', default=False, show_default=True, + help='Batch mode when evaluating likelihood.') +@click.option('--int_bin_precision', type=float, default=-1., show_default=True, + help='Integrate the PDF over the bins instead of using the probability ' + 'density at the bin center.') +@click.option('--extra_minimizer_options', default=None, show_default=True, + help='Additional minimizer options to include. Format should be <config>=<value> ' + 'separated by commas. Example: "discrete_min_tol=0.001,do_discrete_iteration=1"') +@click.option('--cms_runtimedef', 'runtimedef_expr', default=None, show_default=True, + help='CMS specific runtime definitions. Format should be <config>=<value> ' + 'separated by commas. Example: "REMOVE_CONSTANT_ZERO_POINT=1,ADDNLL_GAUSSNLL=0"') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') +def np_correlation(**kwargs): + """ + Evaluate post-fit NP correlation matrix + """ + _kwargs = {} + for arg_name in ["basename", "save_plot", "save_json", "save_root", + "plot_style", "select", "remove"]: + _kwargs[arg_name] = kwargs.pop(arg_name) + _init_kwargs = {} + for arg_name in ["filename", "data_name", "verbosity"]: + _init_kwargs[arg_name] = kwargs.pop(arg_name) + _init_kwargs['config'] = kwargs + _init_kwargs['poi_name'] = [] + from quickstats.components import AnalysisBase + analysis = AnalysisBase(**_init_kwargs) + analysis.minimizer.configure(hesse=True) + analysis.nll_fit(mode=3) + fit_result = analysis.roofit_result + basename = os.path.splitext(_kwargs['basename'])[0] + from quickstats.utils.roofit_utils import get_correlation_matrix + if _kwargs['save_root']: + correlation_hist = get_correlation_matrix(fit_result, lib="root") + outname = basename + ".root" + correlation_hist.SaveAs(outname) + print(f"INFO: Saved correlation histogram to `{outname}`") + correlation_hist.Delete() + from quickstats.utils.common_utils import filter_by_wildcards + if _kwargs['save_json'] or _kwargs['save_plot']: + df = get_correlation_matrix(fit_result, lib='pandas') + labels = list(df.columns) + selected = filter_by_wildcards(labels, _kwargs['select']) + selected = filter_by_wildcards(selected, _kwargs['remove'], exclusion=True) + to_drop = list(set(labels) - set(selected)) + df = df.drop(to_drop, axis=0).drop(to_drop, axis=1).transpose() + if _kwargs['save_json']: + data = df.to_dict() + outname = basename + ".json" + with open(outname, "w") as out: + json.dump(data, out, indent=2) + print(f"INFO: Saved correlation data to `{outname}`") + if _kwargs['save_plot']: + import matplotlib.pyplot as plt + from quickstats.plots import CorrelationPlot + plotter = CorrelationPlot(df) + ax = plotter.draw_style(_kwargs['plot_style']) + outname = basename + ".pdf" + plt.savefig(outname, bbox_inches="tight") + print(f"INFO: Saved correlation plot to `{outname}`") \ No newline at end of file diff --git a/quickstats/clis/processor_cli.py b/quickstats/clis/processor_tools.py similarity index 77% rename from quickstats/clis/processor_cli.py rename to quickstats/clis/processor_tools.py index c4994cd65fc6c6edc4626a4008783681c420de0a..97870a3b031127e4f45b1be0010732bdff381fa7 100644 --- a/quickstats/clis/processor_cli.py +++ b/quickstats/clis/processor_tools.py @@ -1,7 +1,11 @@ import os import click -@click.command(name='process_rfile') +from .core import cli + +__all__ = ['process_rfile'] + +@cli.command(name='process_rfile') @click.option('-i', '--input_file', 'filename', required=True, help='Input ROOT file to process.') @click.option('-c', '--config', 'config_path', required=True, @@ -9,14 +13,15 @@ import click @click.option('--multithread/--no-multirhread', default=True, show_default=True, help='Enable implicit multi-threading.') @click.option('-g', '--global', 'glob', default=None, - help='Include global variables in the form "<name>=<value>,..."') + help='Include global variables in the form "<name>=<value>,..." .') @click.option('-f', '--flag', default=None, help='Flags to set (separated by commas).') -@click.option('-v', '--verbosity', default="INFO", show_default=True, - help='verbosity level ("DEBUG", "INFO", "WARNING", "ERROR")') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') def process_rfile(filename, config_path, multithread, glob, flag, verbosity): """ - Process a ROOT file based on RDataFrame routines. + Process a ROOT file based on RDataFrame routines """ from quickstats.components.processors import RooProcessor from quickstats.components.processors.actions import RooProcGlobalVariables diff --git a/quickstats/clis/stat_tools.py b/quickstats/clis/stat_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..ef8b833da8b0e3ffe382353546663dfd798b0b10 --- /dev/null +++ b/quickstats/clis/stat_tools.py @@ -0,0 +1,201 @@ +import os +import json +import click + +from .core import cli + +__all__ = ['generate_standard_asimov', 'toy_significance', 'toy_limit'] + +@cli.command(name='generate_standard_asimov') +@click.option('-i', '--input_file', 'filename', required=True, + help='Path to the input workspace file.') +@click.option('-o', '--output_file', 'outname', required=True, + help='Name of the output workspace containing the ' + 'generated asimov dataset.') +@click.option('-d', '--data', 'data_name', default='combData', show_default=True, + help='Name of the dataset used in NP profiling.') +@click.option('-p', '--poi', 'poi_name', required=True, + help='Name of the parameter of interest (POI). Multiple POIs are separated by commas.') +@click.option('-s', '--poi_scale', type=float, default=1.0, show_default=True, + help='Scale factor applied to the poi value.') +@click.option('-e', '--eps', type=float, default=1.0, show_default=True, + help='Minimization convergence criterium.') +@click.option('--strategy', type=int, default=1, show_default=True, + help='Default minimization strategy.') +@click.option('-f', '--fix', 'fix_param', default="", show_default=True, + help='Parameters to fix.') +@click.option('-r', '--profile', 'profile_param', default="", show_default=True, + help='Parameters to profile.') +@click.option('-s', '--snapshot', 'snapshot_name', default=None, show_default=True, + help='Name of initial snapshot.') +@click.option('--rebuild/--do-not-rebuild', default=False, show_default=True, + help='Rebuild the workspace.') +@click.option('--asimov_names', default=None, show_default=True, + help='Names of the output asimov datasets (separated by commas). If not specified, ' + 'a default name for the corresponding asimov type will be given.') +@click.option('--asimov_snapshots', default=None, show_default=True, + help='Names of the output asimov snapshots (separated by commas). If not specified, ' + 'a default name for the corresponding asimov type will be given.') +@click.option('-t', '--asimov_types', default="0,1,2", show_default=True, + help='\b\n Types of asimov dataset to generate separated by commas.' + '\b\n 0: fit with POI fixed to 0' + '\b\n 1: fit with POI fixed to 1' + '\b\n 2: fit with POI free and set POI to 1 after fit' + '\b\n 3: fit with POI and constrained NP fixed to 0' + '\b\n 4: fit with POI fixed to 1 and constrained NP fixed to 0' + '\b\n 5: fit with POI free and constrained NP fixed to 0 and set POI to 1 after fit' + '\b\n -1: nominal NP with POI set to 0' + '\b\n -2: nominal NP with POI set to 1') +@click.option('--extra_minimizer_options', default=None, show_default=True, + help='Additional minimizer options to include. Format should be <config>=<value> ' + 'separated by commas. Example: "discrete_min_tol=0.001,do_discrete_iteration=1"') +@click.option('--cms_runtimedef', 'runtimedef_expr', default=None, show_default=True, + help='CMS specific runtime definitions. Format should be <config>=<value> ' + 'separated by commas. Example: "REMOVE_CONSTANT_ZERO_POINT=1,ADDNLL_GAUSSNLL=0"') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') +def generate_standard_asimov(**kwargs): + """ + Generate standard Asimov dataset + """ + from quickstats.components import AsimovGenerator + from quickstats.utils.string_utils import split_str + outname = kwargs.pop('outname') + asimov_types = kwargs.pop('asimov_types') + try: + asimov_types = split_str(asimov_types, sep=",", cast=int) + except: + asimov_types = split_str(asimov_types, sep=",") + fix_param = kwargs.pop('fix_param') + profile_param = kwargs.pop('profile_param') + snapshot_name = kwargs.pop('snapshot_name') + poi_scale = kwargs.pop("poi_scale") + asimov_names = kwargs.pop("asimov_names") + asimov_snapshots = kwargs.pop("asimov_snapshots") + verbosity = kwargs.pop("verbosity") + rebuild = kwargs.pop("rebuild") + kwargs['poi_name'] = split_str(kwargs.pop('poi_name'), sep=",") + config = { + 'fix_param': fix_param, + 'profile_param': profile_param, + 'snapshot_name': snapshot_name + } + from quickstats.utils.string_utils import split_str + if asimov_names is not None: + asimov_names = split_str(asimov_names, sep=",") + if asimov_snapshots is not None: + asimov_snapshots = split_str(asimov_snapshots, sep=",") + generator = AsimovGenerator(**kwargs, config=config, verbosity=verbosity) + generator.generate_standard_asimov(asimov_types, poi_scale=poi_scale, + asimov_names=asimov_names, + asimov_snapshots=asimov_snapshots) + generator.save(outname, rebuild=rebuild) + +@cli.command(name='toy_significance') +@click.option('-i', '--input_file', 'filename', required=True, + help='Path to the input workspace file.') +@click.option('-o', '--output_file', 'outname', default="toy_study/results.json", + help='Name of the output file containing toy results.') +@click.option('-n', '--n_toys', type=int, + help='Number of the toys to use.') +@click.option('-b', '--batchsize', type=int, default=100, show_default=True, + help='Divide the task into batches each containing this number of toys. ' + 'Result from each batch is saved for caching and different batches ' + 'are run in parallel if needed.') +@click.option('-s', '--seed', type=int, default=0, show_default=True, + help='Random seed used for generating toy datasets.') +@click.option('-p', '--poi', 'poi_name', default=None, + help='Name of the parameter of interest (POI). If None, the first POI is used.') +@click.option('-v', '--poi_val', type=float, default=0, show_default=True, + help='POI value when generating the toy dataset.') +@click.option('--binned/--unbinned', default=True, show_default=True, + help='Generate binned toy dataset.') +@click.option('--cache/--no-cache', default=True, show_default=True, + help='Cache existing batch results.') +@click.option('--fit_options', default=None, help='A json file specifying the fit options.') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') +@click.option('--parallel', type=int, default=-1, show_default=True, + help='\b\n Parallelize job across the N workers.' + '\b\n Case 0: Jobs are run sequentially (for debugging).' + '\b\n Case -1: Jobs are run across N_CPU workers.') +def toy_significance(**kwargs): + """ + Generate toys and evaluate significance + """ + from quickstats.components import PValueToys + n_toys = kwargs.pop("n_toys") + batchsize = kwargs.pop("batchsize") + seed = kwargs.pop("seed") + cache = kwargs.pop("cache") + outname = kwargs.pop("outname") + parallel = kwargs.pop("parallel") + pvalue_toys = PValueToys(**kwargs) + pvalue_toys.get_toy_results(n_toys=n_toys, batchsize=batchsize, seed=seed, + cache=cache, save_as=outname, parallel=parallel) + +@cli.command(name='toy_limit') +@click.option('-i', '--input_file', 'filename', required=True, + help='Path to the input workspace file.') +@click.option('-d', '--data', 'data_name', default='combData', show_default=True, + help='Name of the dataset used for computing observed limit.') +@click.option('-o', '--output_file', 'outname', + default="toy_study/toy_result_seed_{seed}_batch_{batch}.root", + show_default=True, + help='Name of the output file containing toy results.') +@click.option('--poi_max', type=float, default=None, + help='Maximum range of POI.') +@click.option('--poi_min', type=float, default=None, + help='Minimum range of POI.') +@click.option('--scan_max', type=float, default=None, + help='Maximum scan value of POI.') +@click.option('--scan_min', type=float, default=None, + help='Minimum scan value of POI.') +@click.option('--steps', type=int, default=10, show_default=True, + help='Number of scan steps.') +@click.option('--mu_val', type=float, default=None, + help='Value of POI for running a single point.') +@click.option('-n', '--n_toys', type=int, + help='Number of the toys to use.') +@click.option('-b', '--batchsize', type=int, default=50, show_default=True, + help='Divide the task into batches each containing this number of toys. ' + 'Result from each batch is saved for caching and different batches ' + 'are run in parallel if needed.') +@click.option('-s', '--seed', type=int, default=2021, show_default=True, + help='Random seed used for generating toy datasets.') +@click.option('-t', '--tolerance', type=float, default=1., show_default=True, + help='Tolerance for minimization.') +@click.option('-p', '--poi', 'poi_name', default=None, + help='Name of the parameter of interest (POI). If None, the first POI is used.') +@click.option('--minimizer_type', default="Minuit2", show_default=True, + help='Minimizer type.') +@click.option('--strategy', type=int, default=1, show_default=True, + help='Default minimization strategy.') +@click.option('--offset/--no-offset', default=True, show_default=True, + help='Use NLL offset.') +@click.option('--print_level', type=int, default=-1, show_default=True, + help='Minimizer print level.') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') +@click.option('-f', '--fix', 'fix_param', default="", show_default=True, + help='Parameters to fix.') +@click.option('-r', '--profile', 'profile_param', default="", show_default=True, + help='Parameters to profile.') +@click.option('--snapshot', 'snapshot_name', default=None, help='Name of initial snapshot') +@click.option('--parallel', type=int, default=-1, show_default=True, + help='\b\n Parallelize job across the N workers.' + '\b\n Case 0: Jobs are run sequentially (for debugging).' + '\b\n Case -1: Jobs are run across N_CPU workers.') +def toy_limit(**kwargs): + """ + Generate toys and evaluate limits + """ + from quickstats.components.toy_limit_calculator import evaluate_batched_toy_limits + if not (((kwargs['scan_min'] is None) and (kwargs['scan_max'] is None) and (kwargs['mu_val'] is not None)) or \ + ((kwargs['scan_min'] is not None) and (kwargs['scan_max'] is not None) and (kwargs['mu_val'] is None))): + raise ValueError("please provide either (scan_min, scan_max, steps) for running a scan or (mu_val)" + " for running a single point") + evaluate_batched_toy_limits(**kwargs) \ No newline at end of file diff --git a/quickstats/clis/workspace_tools.py b/quickstats/clis/workspace_tools.py index e5ffe18f74646b5272a50e557d321e9b75d29516..47d8be470e9aa59e3d29a60fe645eefdd3542d18 100644 --- a/quickstats/clis/workspace_tools.py +++ b/quickstats/clis/workspace_tools.py @@ -1,13 +1,49 @@ import os import click -from .core import DelimitedStr +from .core import DelimitedStr, cli -@click.command(name='build_xml_ws') +__all__ = ['inspect_ws', 'compare_ws', 'build_xml_ws', 'modify_ws', 'combine_ws', 'decompose_ws'] + +kItemChoices = ['workspace', 'dataset', 'snapshot', 'category', 'poi', + 'detailed_nuisance_parameter', 'nuisance_parameter', + 'global_observable', 'auxiliary'] +kDefaultItems = ",".join(['workspace', 'dataset', 'snapshot', 'category', + 'poi', 'detailed_nuisance_parameter']) + +@cli.command(name='inspect_ws') +@click.option('-i', '--input_file', required=True, help='Path to the input workspace file.') +@click.option('-w', '--workspace', 'ws_name', default=None, help='Name of workspace. Auto-detect by default.') +@click.option('-d', '--dataset', 'data_name', default=None, help='Name of dataset. Generally not needed.') +@click.option('-m', '--model_config', 'mc_name', default=None, help='Name of model config. Auto-detect by default.') +@click.option('-o', '--output_file', default=None, help='Export output to a text file. If None, no output is saved.') +@click.option('--items', cls=DelimitedStr, type=click.Choice(kItemChoices), show_default=True, + default=kDefaultItems, help='Items to include in the summary (separated by commas).') +@click.option('--include', 'include_patterns', default=None, + help='Match variable names with given patterns (separated by commas).') +@click.option('--exclude', 'exclude_patterns', default=None, + help='Exclude variable names with given patterns (separated by commas).') +@click.option('--detailed/--name-only', default=True, show_default=True, + help='Include detailed variable properties or just the variable name in the summary.') +def inspect_ws(input_file, ws_name=None, data_name=None, mc_name=None, output_file=None, items=None, + include_patterns=None, exclude_patterns=None, detailed=True): + ''' + Inspect workspace attributes + ''' + from quickstats.components import ExtendedModel + model = ExtendedModel(input_file, ws_name=ws_name, mc_name=mc_name, data_name=data_name, + verbosity="WARNING") + from quickstats.utils.string_utils import split_str + #items = items.split(",") if items is not None else None + include_patterns = split_str(include_patterns, ',') if include_patterns is not None else None + exclude_patterns = split_str(exclude_patterns, ',') if exclude_patterns is not None else None + model.stdout.verbosity = "INFO" + model.print_summary(items=items, save_as=output_file, detailed=detailed, + include_patterns=include_patterns, exclude_patterns=exclude_patterns) + +@cli.command(name='build_xml_ws') @click.option('-i', '--filename', 'source', required=True, help='Input xml file.') -@click.option('--binned/--unbinned', 'use_binned', default=False, show_default=True, - help='Fit to binned data.') @click.option('--data_storage_type', default="vector", show_default=True, type=click.Choice(['vector', 'tree', 'composite']), help='Set RooAbsData StorageType. Available choices: "vector", "tree", "composite".') @@ -15,35 +51,34 @@ from .core import DelimitedStr help='Base directory to which files in the xmls are referenced. ' 'By default, the directory of the input xml file is used.') @click.option('-t', '--minimizer_type', default="Minuit2", show_default=True, - help='Minimizer type') + help='Minimizer type.') @click.option('-a', '--minimizer_algo', default="Migrad", show_default=True, - help='Minimizer algorithm') -@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, - help='Number of CPUs to use per parameter') + help='Minimizer algorithm.') +@click.option('--strategy', type=int, default=1, show_default=True, + help='Default minimization strategy.') @click.option('-e', '--eps', type=float, default=1.0, show_default=True, - help='Convergence criterium') + help='Minimization convergence criterium.') @click.option('--retry', type=int, default=1, show_default=True, - help='Maximum number of retries upon a failed fit') -@click.option('--strategy', type=int, default=1, show_default=True, - help='Default minimization strategy') -@click.option('--print_level', type=int, default=-1, show_default=True, - help='Minimizer print level') -@click.option('--fix-cache/--no-fix-cache', default=True, show_default=True, - help='Fix StarMomentMorph cache') -@click.option('--fix-multi/--no-fix-cache', default=True, show_default=True, - help='Fix MultiPdf level 2') -@click.option('--max_calls', type=int, default=-1, show_default=True, - help='Maximum number of function calls') -@click.option('--max_iters', type=int, default=-1, show_default=True, - help='Maximum number of Minuit iterations') + help='Maximum number of retries upon a failed fit.') @click.option('--optimize', type=int, default=2, show_default=True, - help='Optimize constant terms') + help='Optimize constant terms.') +@click.option('--minimizer_offset', type=int, default=1, show_default=True, + help='Enable minimizer offsetting.') @click.option('--offset/--no-offset', default=True, show_default=True, - help='Offset likelihood') + help='Offset likelihood.') +@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, + help='Number of CPUs to use during minimization.') @click.option('--apply-fix/--do-not-apply-fix', default=False, show_default=True, - help='Apply a fix on the up/down uncertainty implementation') -@click.option('-v', '--verbosity', default="INFO", show_default=True, - help='Verbosity level ("DEBUG", "INFO", "WARNING", "ERROR")') + help='Apply a fix on the up/down uncertainty implementation.') +@click.option('--extra_minimizer_options', default=None, show_default=True, + help='Additional minimizer options to include. Format should be <config>=<value> ' + 'separated by commas. Example: "discrete_min_tol=0.001,do_discrete_iteration=1"') +@click.option('--cms_runtimedef', 'runtimedef_expr', default=None, show_default=True, + help='CMS specific runtime definitions. Format should be <config>=<value> ' + 'separated by commas. Example: "REMOVE_CONSTANT_ZERO_POINT=1,ADDNLL_GAUSSNLL=0"') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') @click.option('--version', type=int, default=2, show_default=True, help='Version of XMLWSBuilder to use (Choose between 1 and 2).') def build_xml_ws(**kwargs): @@ -63,7 +98,7 @@ def build_xml_ws(**kwargs): builder = XMLWSBuilder(**_kwargs) builder.generate_workspace() -@click.command(name='modify_ws') +@cli.command(name='modify_ws') @click.option('-i', '--filename', 'source', required=True, help='Input xml/json file.') @click.option('--input_workspace', @@ -72,35 +107,34 @@ def build_xml_ws(**kwargs): help='Override output workspace path from the xml/json file.') @click.option('--import-class-code/--no-import-class-code', 'import_class_code', default=True, show_default=True, - help='Import class code') + help='Import class code.') @click.option('-t', '--minimizer_type', default="Minuit2", show_default=True, - help='Minimizer type') + help='Minimizer type.') @click.option('-a', '--minimizer_algo', default="Migrad", show_default=True, - help='Minimizer algorithm') -@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, - help='Number of CPUs to use per parameter') -@click.option('-e', '--eps', type=float, default=1.0, show_default=True, - help='Convergence criterium') -@click.option('--retry', type=int, default=0, show_default=True, - help='Maximum number of retries upon a failed fit') + help='Minimizer algorithm.') @click.option('--strategy', type=int, default=1, show_default=True, - help='Default minimization strategy') -@click.option('--print_level', type=int, default=-1, show_default=True, - help='Minimizer print level') -@click.option('--fix-cache/--no-fix-cache', default=True, show_default=True, - help='Fix StarMomentMorph cache') -@click.option('--fix-multi/--no-fix-cache', default=True, show_default=True, - help='Fix MultiPdf level 2') -@click.option('--max_calls', type=int, default=-1, show_default=True, - help='Maximum number of function calls') -@click.option('--max_iters', type=int, default=-1, show_default=True, - help='Maximum number of Minuit iterations') + help='Default minimization strategy.') +@click.option('-e', '--eps', type=float, default=1.0, show_default=True, + help='Minimization convergence criterium.') +@click.option('--retry', type=int, default=1, show_default=True, + help='Maximum number of retries upon a failed fit.') @click.option('--optimize', type=int, default=2, show_default=True, - help='Optimize constant terms') + help='Optimize constant terms.') +@click.option('--minimizer_offset', type=int, default=1, show_default=True, + help='Enable minimizer offsetting.') @click.option('--offset/--no-offset', default=True, show_default=True, - help='Offset likelihood') -@click.option('-v', '--verbosity', default="INFO", show_default=True, - help='verbosity level ("DEBUG", "INFO", "WARNING", "ERROR")') + help='Offset likelihood.') +@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, + help='Number of CPUs to use during minimization.') +@click.option('--extra_minimizer_options', default=None, show_default=True, + help='Additional minimizer options to include. Format should be <config>=<value> ' + 'separated by commas. Example: "discrete_min_tol=0.001,do_discrete_iteration=1"') +@click.option('--cms_runtimedef', 'runtimedef_expr', default=None, show_default=True, + help='CMS specific runtime definitions. Format should be <config>=<value> ' + 'separated by commas. Example: "REMOVE_CONSTANT_ZERO_POINT=1,ADDNLL_GAUSSNLL=0"') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') def modify_ws(**kwargs): """ Modify workspace from XML/json config files @@ -128,7 +162,7 @@ kDefaultVisibility = ",".join(["workspace=0b11001", "category=0b10011", "snapsho "dataset=0b11011", "pdf=0b01011", "function=0b01011" , "poi=0b01011", "nuisance_parameter=0b01011", "global_observable=0b01011", "auxiliary=0b01011"]) -@click.command(name='compare_ws') +@cli.command(name='compare_ws') @click.option('-l', '--left', required=True, help='Path to the input workspace file (left of comparison).') @click.option('-r', '--right', required=True, @@ -153,8 +187,9 @@ kDefaultVisibility = ",".join(["workspace=0b11001", "category=0b10011", "snapsho help='Save comparison data as a json file.') @click.option('--save_excel_data', default=None, help='Save comparison data as an excel file.') -@click.option('-v', '--verbosity', default="INFO", show_default=True, - help='verbosity level ("DEBUG", "INFO", "WARNING", "ERROR")') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') def compare_ws(**kwargs): """ Compare two workspace files @@ -178,7 +213,7 @@ def compare_ws(**kwargs): comparer.save_excel(save_excel_data) -@click.command(name='combine_ws') +@cli.command(name='combine_ws') @click.option('-i', '--filename', 'source', required=True, help='Input xml file.') @click.option('--input_workspace', @@ -186,40 +221,39 @@ def compare_ws(**kwargs): @click.option('--output_workspace', help='Override output workspace path from the xml file.') @click.option('--save_rename_ws/--skip_rename_ws', default=False, show_default=True, - help='Save a temporary workspace after the rename step') + help='Save a temporary workspace after the rename step.') @click.option('--save_combine_ws/--skip_combine_ws', default=False, show_default=True, - help='Save a temporary workspace after the combine step') + help='Save a temporary workspace after the combine step.') @click.option('-t', '--minimizer_type', default="Minuit2", show_default=True, - help='Minimizer type') + help='Minimizer type.') @click.option('-a', '--minimizer_algo', default="Migrad", show_default=True, - help='Minimizer algorithm') -@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, - help='Number of CPUs to use per parameter') -@click.option('-e', '--eps', type=float, default=1.0, show_default=True, - help='Convergence criterium') -@click.option('--retry', type=int, default=0, show_default=True, - help='Maximum number of retries upon a failed fit') + help='Minimizer algorithm.') @click.option('--strategy', type=int, default=1, show_default=True, - help='Default minimization strategy') -@click.option('--print_level', type=int, default=-1, show_default=True, - help='Minimizer print level') -@click.option('--fix-cache/--no-fix-cache', default=True, show_default=True, - help='Fix StarMomentMorph cache') -@click.option('--fix-multi/--no-fix-cache', default=True, show_default=True, - help='Fix MultiPdf level 2') -@click.option('--max_calls', type=int, default=-1, show_default=True, - help='Maximum number of function calls') -@click.option('--max_iters', type=int, default=-1, show_default=True, - help='Maximum number of Minuit iterations') + help='Default minimization strategy.') +@click.option('-e', '--eps', type=float, default=1.0, show_default=True, + help='Minimization convergence criterium.') +@click.option('--retry', type=int, default=1, show_default=True, + help='Maximum number of retries upon a failed fit.') @click.option('--optimize', type=int, default=2, show_default=True, - help='Optimize constant terms') + help='Optimize constant terms.') +@click.option('--minimizer_offset', type=int, default=1, show_default=True, + help='Enable minimizer offsetting.') @click.option('--offset/--no-offset', default=True, show_default=True, - help='Offset likelihood') + help='Offset likelihood.') +@click.option('-c', '--num_cpu', type=int, default=1, show_default=True, + help='Number of CPUs to use during minimization.') @click.option('--import-class-code/--no-import-class-code', 'import_class_code', default=True, show_default=True, - help='Import class code') -@click.option('-v', '--verbosity', default="INFO", show_default=True, - help='verbosity level ("DEBUG", "INFO", "WARNING", "ERROR")') + help='Import class code.') +@click.option('--extra_minimizer_options', default=None, show_default=True, + help='Additional minimizer options to include. Format should be <config>=<value> ' + 'separated by commas. Example: "discrete_min_tol=0.001,do_discrete_iteration=1"') +@click.option('--cms_runtimedef', 'runtimedef_expr', default=None, show_default=True, + help='CMS specific runtime definitions. Format should be <config>=<value> ' + 'separated by commas. Example: "REMOVE_CONSTANT_ZERO_POINT=1,ADDNLL_GAUSSNLL=0"') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') def combine_ws(**kwargs): """ Combine workspace from XML config files @@ -253,31 +287,32 @@ def combine_ws(**kwargs): import_class_code=import_class_code) -@click.command(name='decompose_ws') +@cli.command(name='decompose_ws') @click.option('-i', '--infile', required=True, - help='Path to the input workspace file') + help='Path to the input workspace file.') @click.option('-o', '--outfile', required=True, - help='Path to the output workspace file') + help='Path to the output workspace file.') @click.option('--import-class-code/--no-import-class-code', 'import_class_code', default=True, show_default=True, - help='Import class code') + help='Import class code.') @click.option('-c', '--category_expr', default='*', show_default=True, help='Categories to keep in the decomposed workspace (separated by commas). ' 'Both category index and category label can be used. Category index can be ' 'a single number or a range "<min_index>-<max_index>. Wildcard is supported ' 'for category labels.') -@click.option('-v', '--verbosity', default="INFO", show_default=True, - help='verbosity level ("DEBUG", "INFO", "WARNING", "ERROR")') @click.option('--snapshots', 'snapshots_to_save', default=None, show_default=True, help='Snapshots to save (separated by commas)' - 'By default, all existing snapshots will be saved') + 'By default, all existing snapshots will be saved.') @click.option('--rebuild-nuis/--no-rebuild-nuis', default=False, show_default=True, - help='Whether to rebuild the nuisance parameter set') + help='Whether to rebuild the nuisance parameter set.') @click.option('--rebuild-pdf/--no-rebuild-pdf', default=False, show_default=True, - help='Whether to rebuild category pdfs') + help='Whether to rebuild category pdfs.') +@click.option('-v', '--verbosity', default='INFO', show_default=True, + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + help='Verbosity level.') def decompose_ws(**kwargs): """ - Decompose workspace into subcategories. + Decompose workspace into subcategories """ init_kwargs = {} for key in ['verbosity']: diff --git a/quickstats/components/__init__.py b/quickstats/components/__init__.py index 9ad30f2c1219faeafcc756ca29be2961c6acff38..ba4e6c902be8601567efe8f1a0ce544bc98ca6bb 100644 --- a/quickstats/components/__init__.py +++ b/quickstats/components/__init__.py @@ -1,5 +1,6 @@ from .basics import * from .root_object import ROOTObject +from .discrete_nuisance import DiscreteNuisance from .extended_minimizer import ExtendedMinimizer from .extended_model import ExtendedModel from .analysis_object import AnalysisObject @@ -15,6 +16,7 @@ from .extended_rfile import ExtendedRFile from .nuisance_parameter_pull import NuisanceParameterPull from .nuisance_parameter_ranking import NuisanceParameterRanking from .nuisance_parameter_harmonizer import NuisanceParameterHarmonizer +from .caching_nll_wrapper import CachingNLLWrapper #from .signal_modelling import SignalModelling import ROOT diff --git a/quickstats/components/analysis_object.py b/quickstats/components/analysis_object.py index e76ec9a457388c6eb963bb8c3a5faa243b758f3d..f93d420b9ee23c1a4e0dcae2bd5a02e083758f68 100644 --- a/quickstats/components/analysis_object.py +++ b/quickstats/components/analysis_object.py @@ -1,11 +1,13 @@ -from typing import Optional, Union, List +from typing import Optional, Union, List, Dict + +import ROOT from quickstats import AbstractObject from quickstats.components import ExtendedModel, ExtendedMinimizer from quickstats.components.basics import WSArgument -from quickstats.utils.common_utils import get_class_that_defined_method -from quickstats.interface.root import RooArgSet -import ROOT +from quickstats.utils.common_utils import get_class_that_defined_method, update_config_dict, combine_dict +from quickstats.utils.roostats_utils import set_prior_pdf +from quickstats.interface.root import RooArgSet, ModelConfig class AnalysisObject(AbstractObject): @@ -18,12 +20,13 @@ class AnalysisObject(AbstractObject): fix_param:str='', profile_param:str='', ws_name:Optional[str]=None, mc_name:Optional[str]=None, snapshot_name:Optional[Union[List[str], str]]=None, minimizer_type:str='Minuit2', minimizer_algo:str='Migrad', precision:float=0.001, - eps:float=1.0, retry:int=1, strategy:int=1, print_level:int=-1, timer:bool=False, - num_cpu:int=1, offset:bool=True, optimize:int=2, eigen:bool=False, hesse:bool=False, - improve:int=0, fix_cache:bool=True, fix_multi:bool=True, max_calls:int=-1, - max_iters:int=-1, constrain_nuis:bool=True, batch_mode:bool=False, + eps:float=1.0, retry:int=1, strategy:int=1, print_level:int=-1, + num_cpu:int=1, offset:bool=True, optimize:int=2, constrain_nuis:bool=True, + batch_mode:bool=False, prior_type:Optional[str]=None, runtimedef_expr:Optional[str]=None, int_bin_precision:float=-1., preset_param:bool=False, minimizer_offset:int=1, - minimizer_cls=None, verbosity:Optional[Union[int, str]]="INFO"): + extra_minimizer_options:Optional[Union[Dict, str]]=None, + minimizer_cls=None, verbosity:Optional[Union[int, str]]="INFO", + **kwargs): super().__init__(verbosity=verbosity) self.model = None self.minimizer = None @@ -39,52 +42,74 @@ class AnalysisObject(AbstractObject): "mc_name" : mc_name, "data_name" : data_name, "snapshot_name" : snapshot_name, - "binned_likelihood" : binned_likelihood, - "fix_cache" : fix_cache, - "fix_multi" : fix_multi + "binned_likelihood" : binned_likelihood } self.setup_model(**model_options) self.model._load_floating_auxiliary_variables() self.save_snapshot(self.kInitialSnapshotName, WSArgument.MUTABLE) - self.set_poi(poi_name) - if preset_param: - self.preset_parameters() - self.setup_parameters(fix_param, profile_param, update_snapshot=True) minimizer_options = { + "constrain_nuis" : constrain_nuis, "minimizer_type" : minimizer_type, "minimizer_algo" : minimizer_algo, "precision" : precision, "eps" : eps, - "retry" : retry, "strategy" : strategy, - "num_cpu" : num_cpu, - "offset" : offset, "minimizer_offset" : minimizer_offset, + "offset" : offset, "optimize" : optimize, - "eigen" : eigen, - "hesse" : hesse, - "improve" : improve, - "max_calls" : max_calls, - "max_iters" : max_iters, + "retry" : retry, + "num_cpu" : num_cpu, "print_level" : print_level, - "timer" : timer, - "constrain_nuis" : constrain_nuis, "batch_mode" : batch_mode, "int_bin_precision" : int_bin_precision, + "runtimedef_expr" : runtimedef_expr, "verbosity" : verbosity } - self.setup_minimizer(**minimizer_options) + if kwargs: + minimizer_options = combine_dict(minimizer_options, kwargs) + minimizer_options = update_config_dict(minimizer_options, extra_minimizer_options) + self.setup_minimizer(discrete_nuisance=self.model._discrete_nuisance, + **minimizer_options) + self.set_poi(poi_name) + if preset_param: + self.preset_parameters() + self.setup_parameters(fix_param, profile_param, update_snapshot=True) + set_prior_pdf(self.model.workspace, + self.model.model_config, + self.poi, prior_type) self.sanity_check() + def sanity_check(self): - aux_vars = self.model.get_variables("auxiliary") - floating_aux_vars = [v.GetName() for v in aux_vars if not v.isConstant()] - if len(floating_aux_vars) > 0: - self.stdout.warning("The following auxiliary variables (variables that are not " - "part of the POIs, observables, nuisance parameters and global " - f"observables) are floating: {','.join(floating_aux_vars)}. If this is " - "not intended, please make sure to fix them before fitting.", "red") - + + def check_aux_vars_are_fixed(): + aux_vars = self.model.get_variables("auxiliary") + ROOT.RooStats.RemoveConstantParameters(aux_vars) + if self.model.has_discrete_nuisance(): + aux_vars = RooArgSet.get_set_difference(aux_vars, self.model.discrete_nuisance.multipdf_params) + aux_vars = RooArgSet.get_set_difference(aux_vars, ROOT.RooArgSet(self.model.discrete_nuisance.multipdf_cats)) + if len(aux_vars) > 0: + aux_var_names = [v.GetName() for v in aux_vars] + self.stdout.warning("The following auxiliary variables (variables that are not " + "part of the POIs, observables, nuisance parameters, global " + "observables and discrete nuisances) are floating: " + f"{','.join(aux_var_names)}. If this is not intended, please " + "make sure to fix them before fitting.", "red") + passed = len(aux_vars) == 0 + return passed + + def check_model_config(): + passed = ModelConfig(verbosity=self.stdout.verbosity).sanity_check(self.model.model_config) + return passed + + for task_name, task in [('Auxiliary variable', check_aux_vars_are_fixed), + ('Model config', check_model_config)]: + passed = task() + if passed: + self.stdout.info(f"{task_name} sanity check: PASSED") + else: + self.stdout.info(f"{task_name} sanity check: FAILED") + def preset_parameters(self, fix_pois:bool=True, fix_globs:bool=True, float_nuis:bool=False): if fix_pois: ROOT.RooStats.SetAllConstant(self.model.pois, True) @@ -111,6 +136,10 @@ class AnalysisObject(AbstractObject): @property def get_poi(self): return self.model.get_poi + + @property + def poi(self): + return self.minimizer._poi # black magic def _inherit_init(self, init_func, **kwargs): @@ -126,23 +155,22 @@ class AnalysisObject(AbstractObject): that_kwargs = {k:v for k,v in kwargs.items() if k in that_parameters} this_kwargs = {k:v for k,v in kwargs.items() if k not in that_parameters} init_func(config=this_kwargs, **that_kwargs) - - def set_poi(self, poi_name:Optional[Union[str, List[str]]]=None): - pois = self.get_poi(poi_name) + + def set_poi(self, pois:Optional[Union[str, List[str], "ROOT.RooRealVar", "ROOT.RooArgSet"]]=None): + if (pois is not None) and not isinstance(pois, (ROOT.RooRealVar, ROOT.RooArgSet)): + pois = self.get_poi(pois) + self.minimizer.set_poi(pois) if isinstance(pois, ROOT.RooRealVar): - poi_text = f'"{pois.GetName()}"' - n_poi = 1 - elif isinstance(pois, ROOT.RooArgSet) and (len(pois) > 0): - poi_text = ", ".join([f'"{poi.GetName()}"' for poi in pois]) - n_poi = len(pois) + poi_names = [pois.GetName()] + elif isinstance(pois, ROOT.RooArgSet): + poi_names = [poi.GetName() for poi in pois] else: - poi_text = None - n_poi = 0 - if n_poi == 1: - self.stdout.info(f'POI set to {poi_text}') - elif n_poi > 1: - self.stdout.info(f'POIs set to {poi_text}') - self.poi = pois + poi_names = [] + if len(poi_names) == 1: + self.stdout.info(f'POI set to "{poi_names[0]}"') + elif len(poi_names) > 1: + text = ", ".join([f'"{name}"' for name in poi_names]) + self.stdout.info(f'POIs set to {text}') def setup_model(self, **kwargs): model = ExtendedModel(**kwargs, verbosity=self.stdout.verbosity) @@ -196,11 +224,14 @@ class AnalysisObject(AbstractObject): f"POI used in this study. This parameter will still be floated during " f"unconditional likelihood fit / limit setting.", "red") - def setup_minimizer(self, constrain_nuis:bool=True, **kwargs): - + def setup_minimizer(self, constrain_nuis:bool=True, + discrete_nuisance:Optional["DiscreteNuisance"]=None, + runtimedef_expr:Optional[str]=None, **kwargs): minimizer = self.minimizer_cls("Minimizer", self.model.pdf, self.model.data, workspace=self.model.workspace, - verbosity=self.stdout.verbosity) + runtimedef_expr=runtimedef_expr, + verbosity=self.stdout.verbosity, + discrete_nuisance=discrete_nuisance) nll_options = {k:v for k,v in kwargs.items() if k in ExtendedMinimizer._DEFAULT_NLL_OPTION_} @@ -216,13 +247,17 @@ class AnalysisObject(AbstractObject): minimizer.configure_nll(**nll_options) minimizer.configure(**minimizer_options) self.minimizer = minimizer + self.model.set_minimizer(self.minimizer) self.default_nll_options = nll_options self.default_minimizer_options = minimizer_options def set_data(self, data_name:str='combData'): - data = self.model.workspace.data(data_name) - if not data: - raise RuntimeError(f'workspace does not contain the dataset "{data_name}"') + if isinstance(data_name, ROOT.RooDataSet): + data = data_name + else: + data = self.model.workspace.data(data_name) + if not data: + raise RuntimeError(f'workspace does not contain the dataset "{data_name}"') self.minimizer.set_data(data) self.model._data = data diff --git a/quickstats/components/asymptotic_cls.py b/quickstats/components/asymptotic_cls.py index 5150f040f10cfdc2c4c31ceace736a2b80a3ed7c..2f0ee9e87552743125d016c3a6f75721324e29d1 100644 --- a/quickstats/components/asymptotic_cls.py +++ b/quickstats/components/asymptotic_cls.py @@ -33,13 +33,11 @@ class AsymptoticCLs(AnalysisObject): better_negative_bands:bool=False, do_blind:bool=True, binned_likelihood:bool=True, fix_param:str='', profile_param:str='', ws_name:Optional[str]=None, mc_name:Optional[str]=None, - snapshot_name:Optional[str]=None, timer:bool=False, + snapshot_name:Optional[str]=None, constrain_nuis:bool=True, minimizer_type:str='Minuit2', minimizer_algo:str='Migrad', eps:float=1.0, retry:int=2, strategy:int=1, print_level:int=-1, - num_cpu:int=1, offset:bool=True, optimize:int=2, - fix_cache:bool=True, fix_multi:bool=True, max_calls:int=-1, - max_iters:int=-1, constrain_nuis:bool=True, batch_mode:bool=False, - int_bin_precision:float=-1., improve:int=0, minimizer_offset:int=1, + num_cpu:int=1, minimizer_offset:int=1, offset:bool=True, + optimize:int=2, batch_mode:bool=False, int_bin_precision:float=-1., verbosity:Optional[Union[int, str]]="INFO", **kwargs): """ args: @@ -64,24 +62,18 @@ class AsymptoticCLs(AnalysisObject): 'retry': retry, 'strategy': strategy, 'num_cpu': num_cpu, + 'minimizer_offset': minimizer_offset, 'offset': offset, 'optimize': optimize, - 'improve': improve, - 'minimizer_offset': minimizer_offset, - 'eigen': False, 'print_level': print_level, - 'timer': timer, - 'fix_cache': fix_cache, - 'fix_multi': fix_multi, - 'max_calls': max_calls, - 'max_iters': max_iters, 'constrain_nuis': constrain_nuis, 'batch_mode': batch_mode, 'int_bin_precision': int_bin_precision, 'preset_param': True, - 'verbosity': verbosity + 'verbosity': verbosity, + **kwargs } - super().__init__(**config) + self._inherit_init(super().__init__, **config) self.do_blind = do_blind self.CL = CL self.precision = precision @@ -98,20 +90,8 @@ class AsymptoticCLs(AnalysisObject): self.stdout.info(f"Changed mu_guess to {mu_guess} so that it is different from mu_exp") self.mu_guess = mu_guess - # define asimov data - if asimov_data_name is not None: - asimov_data_0 = self.model.workspace.data(asimov_data_name) - if not asimov_data_0: - raise RuntimeError(f'failed to load dataset \"{asimov_data_name}\"') - self.asimov_data_0 = asimov_data_0 - glob_snap_name = self.get_conditional_snapsot_name(0, "global_observable") - nuis_snap_name = self.get_conditional_snapsot_name(0, "nuisance_parameter") - if (not self.model.workspace.loadSnapshot(glob_snap_name)) or \ - (not self.model.workspace.loadSnapshot(nuis_snap_name)): - raise RuntimeError("when using user-defined background-only Asimov data, " - f"snapshots {nuis_snap_name} and {glob_snap_name} must be defined.") - else: - self.asimov_data_0 = None + # use custom bkg asimov data if specified + self.load_asimov_data_0(asimov_data_name) # define simplified names self.ws = self.model.workspace @@ -138,22 +118,31 @@ class AsymptoticCLs(AnalysisObject): self.stdout.info('Loaded extension module "AsymptoticCLsTool"') except Exception as e: print(e) - - def set_poi(self, poi_name:Optional[str]=None): - if poi_name is None: - self.poi = self.model.pois.first() - self.stdout.info('POI name not given. Set to first poi "{}" by default.'.format(self.poi.GetName())) - else: - self.poi = self.model.get_poi(poi_name) - self.stdout.info('POI set to "{}"'.format(poi_name)) + + def load_asimov_data_0(self, asimov_data_name:Optional[str]=None): + if asimov_data_name is None: + self.asimov_data_0 = None + return None + asimov_data_0 = self.model.workspace.data(asimov_data_name) + if not asimov_data_0: + raise RuntimeError(f'failed to load dataset "{asimov_data_name}"') + # check that conditional global observable and nuisance parameter snapshots are defined + glob_snap_name = self.get_conditional_snapsot_name(0, "global_observable") + nuis_snap_name = self.get_conditional_snapsot_name(0, "nuisance_parameter") + glob_snap = self.model.workspace.getSnapshot(glob_snap_name) + nuis_snap = self.model.workspace.getSnapshot(nuis_snap_name) + if ((not glob_snap) or (not nuis_snap)): + raise RuntimeError("when using user-defined background-only Asimov data, " + f"snapshots {nuis_snap_name} and {glob_snap_name} must be defined.") + self.asimov_data_0 = asimov_data_0 def set_poi_value(self, val:float): if math.isnan(val): raise ValueError("cannot set poi to nan") if ( val > 0 ) and (self.poi.getMax() < val): - self.poi.setMax(2*val) + self.poi.setMax(2 * val) if ( val < 0 ) and (self.poi.getMin() > val): - self.poi.setMin(2*val) + self.poi.setMin(2 * val) self.poi.setVal(val) def save_nll_snapshot(self, nll, mu): @@ -186,16 +175,10 @@ class AsymptoticCLs(AnalysisObject): self.load_snapshot(snapshot_name) else: raise RuntimeError(f'Failed to load snapshot for nll "{nll.GetName()}"') - if self.stdout.verbosity <= "DEBUG": - self.stdout.debug("Before Fit ----------------------------------------------") - self.model.print_summary(items=['poi', 'nuisance_parameter', 'global_observable']) - self.last_fit_status = self.minimizer.minimize(nll) - self.global_status += self.last_fit_status + self.last_fit_status = self.minimizer.minimize(nll) + self.global_status += self.last_fit_status self.minimizer_calls += 1 nll_val = nll.getVal() - if self.stdout.verbosity <= "DEBUG": - self.stdout.debug("After Fit ----------------------------------------------") - self.model.print_summary(items=['poi', 'nuisance_parameter', 'global_observable']) self.load_snapshot(self.nom_glob_name) return nll_val @@ -205,7 +188,7 @@ class AsymptoticCLs(AnalysisObject): def get_qmu(self, nll:"ROOT.RooNLLVar", mu:float): nll_muhat = self.nll_maps[nll].get('nll_mu_hat', None) if nll_muhat is None: - raise RuntimeError('Failed to get nominal nll for "{}"'.format(nll.GetName())) + raise RuntimeError(f'Failed to get nominal nll for "{nll.GetName()}"') is_const = self.poi.isConstant() self.poi.setConstant(1) self.set_poi_value(mu) @@ -251,10 +234,13 @@ class AsymptoticCLs(AnalysisObject): self.stdout.info("----------------------------------", bare=True) self.stdout.info(f"Getting limit for nll: {nll_name}", bare=True) self.poi.setConstant(0) + # load mu_exp asimov asimov_0_nll = self.summary[0]['nll'] + if nll == asimov_0_nll: self.set_poi_value(self.mu_exp) self.poi.setConstant(1) + # case for observed limit if (nll not in self.nll_maps) or ('nll_mu_hat' not in self.nll_maps[nll]): nll_val = self.get_nll_val(nll) @@ -402,9 +388,11 @@ class AsymptoticCLs(AnalysisObject): @staticmethod def get_approx_limits(median_limit:float, target_cls:float=0.05): limits = {} + # 3.84 = value of delta chi^2 (qmu) corresponding to a coverage probability 1 − alpha = 0.95 in the large data sample limit sigma = median_limit/math.sqrt(3.84) limits[0] = median_limit limits[2] = sigma*(ROOT.Math.gaussian_quantile(1 - target_cls*ROOT.Math.gaussian_cdf( 2), 1) + 2) + limits[1] = sigma*(ROOT.Math.gaussian_quantile(1 - target_cls*ROOT.Math.gaussian_cdf( 1), 1) + 1) limits[-1] = sigma*(ROOT.Math.gaussian_quantile(1 - target_cls*ROOT.Math.gaussian_cdf(-1), 1) - 1) limits[-2] = sigma*(ROOT.Math.gaussian_quantile(1 - target_cls*ROOT.Math.gaussian_cdf(-2), 1) - 2) @@ -431,55 +419,7 @@ class AsymptoticCLs(AnalysisObject): self.nll_maps[nll]['mu_hat'] = mu_hat self.nll_maps[nll]['nll_mu_hat'] = nll_mu_hat self.dataset_maps[dataset] = nll - - def print_failures(self): - has_failtures = any(self.summary[sigma].get('status', 0) > 0 for sigma in self.summary) - if has_failtures: - self.stdout.info('-----------------------------------------------', bare=True) - self.stdout.info('Unresolved fit failures detected', bare=True) - for sigma in self.summary: - label = self._SIGMA_LABEL_.get(sigma, sigma) - self.stdout.info("{}:".format(label).ljust(10) + str(self.summary[sigma].get('status', None)), bare=True) - self.stdout.info('-----------------------------------------------', bare=True) - def print_summary(self): - if (not self.limits) or (not self.approx_limits): - self.stdout.warning("No limits evaluated") - return None - self.stdout.info('', bare=True) - self.print_failures() - if self.do_better_bands: - self.stdout.info('Guess for bands', bare=True) - self.stdout.info(f'+2sigma: {self.approx_limits[2]}', bare=True) - self.stdout.info(f'+1sigma: {self.approx_limits[1]}', bare=True) - self.stdout.info(f'-1sigma: {self.approx_limits[-1]}', bare=True) - self.stdout.info(f'-2sigma: {self.approx_limits[-2]}', bare=True) - if self.do_better_bands: - self.stdout.info('\nCorrect bands', bare=True) - self.stdout.info(f'+2sigma: {self.limits[2]}', bare=True) - self.stdout.info(f'+1sigma: {self.limits[1]}', bare=True) - self.stdout.info(f'-1sigma: {self.limits[-1]}', bare=True) - self.stdout.info(f'-2sigma: {self.limits[-2]}', bare=True) - self.stdout.info('Injected: {}'.format(self.limits['inj']), bare=True) - self.stdout.info(f'Median: {self.limits[0]}', bare=True) - self.stdout.info('Observed: {}'.format(self.limits['obs']), bare=True) - self.stdout.info('', bare=True) - - def save(self, filename:str='limits.json', parameters:Optional[Dict]=None, summary:bool=False): - with open(filename, "w") as file: - if parameters is None: - json.dump(self.limits, file, indent=2) - else: - json.dump({**parameters, **self.limits}, file, indent=2) - file.truncate() - if summary: - data = {k:{kk:vv for kk,vv in self.summary[k].items() if kk not in ['nll', 'dataset']} \ - for k,v in self.summary.items()} - summary_filename = os.path.splitext(filename)[0]+'_summary'+os.path.splitext(filename)[1] - with open(summary_filename, "w") as file: - json.dump(data, file, indent=2) - file.truncate() - def evaluate_limit_band(self, n:int, med_limit:float): init_target_cls = self.LimitTool.getTargetCLs() self.LimitTool.setTargetCLs(2*(1 - ROOT.Math.gaussian_cdf(abs(n)))) @@ -550,11 +490,14 @@ class AsymptoticCLs(AnalysisObject): self.save_nll_snapshot(asimov_0_nll, self.mu_exp) self.load_conditional_snapshot(self.mu_exp, target=WSArgument.GLOBAL_OBSERVABLE) self.load_conditional_snapshot(self.mu_exp, target=WSArgument.NUISANCE_PARAMETER) + if (self.minimizer.has_discrete_nuisance() and self.do_blind): + self.minimizer.minimize(asimov_0_nll) nll_value = asimov_0_nll.getVal() self.fill_mappings(asimov_0_nll, asimov_data_0, self.mu_exp, nll_value) self.LimitTool.setTargetCLs(1 - self.CL) - + + # evaluate median expected limit med_limit = self.get_limit(asimov_0_nll, self.mu_guess, 0) approx_limits = self.get_approx_limits(med_limit, self.LimitTool.getTargetCLs()) limits = approx_limits.copy() @@ -633,3 +576,51 @@ class AsymptoticCLs(AnalysisObject): self.stdout.info(f"pmu: {pmu}", bare=True) self.stdout.info(f"1-pb: {pb}", bare=True) self.stdout.info(f"CLs: {CLs}", bare=True) + + def print_failures(self): + has_failtures = any(self.summary[sigma].get('status', 0) > 0 for sigma in self.summary) + if has_failtures: + self.stdout.info('-----------------------------------------------', bare=True) + self.stdout.info('Unresolved fit failures detected', bare=True) + for sigma in self.summary: + label = self._SIGMA_LABEL_.get(sigma, sigma) + self.stdout.info("{}:".format(label).ljust(10) + str(self.summary[sigma].get('status', None)), bare=True) + self.stdout.info('-----------------------------------------------', bare=True) + + def print_summary(self): + if (not self.limits) or (not self.approx_limits): + self.stdout.warning("No limits evaluated") + return None + self.stdout.info('', bare=True) + self.print_failures() + if self.do_better_bands: + self.stdout.info('Guess for bands', bare=True) + self.stdout.info(f'+2sigma: {self.approx_limits[2]}', bare=True) + self.stdout.info(f'+1sigma: {self.approx_limits[1]}', bare=True) + self.stdout.info(f'-1sigma: {self.approx_limits[-1]}', bare=True) + self.stdout.info(f'-2sigma: {self.approx_limits[-2]}', bare=True) + if self.do_better_bands: + self.stdout.info('\nCorrect bands', bare=True) + self.stdout.info(f'+2sigma: {self.limits[2]}', bare=True) + self.stdout.info(f'+1sigma: {self.limits[1]}', bare=True) + self.stdout.info(f'-1sigma: {self.limits[-1]}', bare=True) + self.stdout.info(f'-2sigma: {self.limits[-2]}', bare=True) + self.stdout.info('Injected: {}'.format(self.limits['inj']), bare=True) + self.stdout.info(f'Median: {self.limits[0]}', bare=True) + self.stdout.info('Observed: {}'.format(self.limits['obs']), bare=True) + self.stdout.info('', bare=True) + + def save(self, filename:str='limits.json', parameters:Optional[Dict]=None, summary:bool=False): + with open(filename, "w") as file: + if parameters is None: + json.dump(self.limits, file, indent=2) + else: + json.dump({**parameters, **self.limits}, file, indent=2) + file.truncate() + if summary: + data = {k:{kk:vv for kk,vv in self.summary[k].items() if kk not in ['nll', 'dataset']} \ + for k,v in self.summary.items()} + summary_filename = os.path.splitext(filename)[0]+'_summary'+os.path.splitext(filename)[1] + with open(summary_filename, "w") as file: + json.dump(data, file, indent=2) + file.truncate() \ No newline at end of file diff --git a/quickstats/components/caching_nll_wrapper.py b/quickstats/components/caching_nll_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..6be3426fba3e68a7ff02f7be1713d26e7e5b1f95 --- /dev/null +++ b/quickstats/components/caching_nll_wrapper.py @@ -0,0 +1,96 @@ +################################################################################################## +# Based on https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit +# Author: Alkaid Cheng +# Email: chi.lung.cheng@cern.ch +################################################################################################## +from typing import List, Optional, Union, Dict, Set, Tuple, Sequence + +import numpy as np + +import ROOT +import cppyy + +from quickstats import semistaticmethod, AbstractObject +from quickstats.utils.common_utils import combine_dict +from quickstats.interface.cppyy import cpp_define + +class CachingNLLWrapper(AbstractObject): + """ + Dedicated wrapper for the CachingSimNLL class from CMS + """ + + @property + def nll(self): + return self._nll + + def __init__(self, nll:Optional["ROOT.RooAbsReal"]=None, + verbosity:Optional[Union[int, str]]="INFO"): + self.set_nll(nll) + + @semistaticmethod + def is_defined_class(self): + return hasattr(cppyy.gbl, 'cacheutils::CachingSimNLL') + + @semistaticmethod + def define_cast_function(self): + if not self.is_defined_class(): + return None + expr = 'cacheutils::CachingSimNLL * castAsCachingSimNLL(RooAbsReal* nll){ return dynamic_cast<cacheutils::CachingSimNLL *>(nll);}' + status = cpp_define(expr, 'CachingNLLWrapperMethod') + return status + + def set_nll(self, nll:Optional["ROOT.RooAbsReal"]=None): + if (nll is None) or (not self.is_defined_class()): + self._nll = None + return None + if not hasattr(cppyy.gbl, 'castAsCachingSimNLL'): + self.define_cast_function() + caching_nll = cppyy.gbl.castAsCachingSimNLL(nll) + if caching_nll: + self._nll = caching_nll + else: + self._nll = None + + def set_zero_point(self): + """Offset the current NLL value to zero + """ + if self.nll is None: + return None + self.stdout.debug(f'Setting zero point for the caching NLL: {self.nll.GetName()}') + self.nll.setZeroPoint() + + def update_zero_point(self): + """Update offset value of the current NLL + """ + if self.nll is None: + return None + self.stdout.debug(f'Updating zero point for the caching NLL: {self.nll.GetName()}') + self.nll.updateZeroPoint() + + def clear_zero_point(self): + """Remove offset value of the current NLL + """ + if self.nll is None: + return None + self.stdout.debug(f'Clearing zero point for the caching NLL: {self.nll.GetName()}') + self.nll.clearZeroPoint() + + def set_hide_categories(self, value:bool=True): + if self.nll is None: + return None + self.nll.setHideRooCategories(value) + + def set_mask_non_discrete_channels(self, value:bool=True): + if self.nll is None: + return None + self.nll.setMaskNonDiscreteChannels(value) + + def set_hide_constants(self, value:bool=True): + if self.nll is None: + return None + self.nll.setHideConstants(value) + + def set_mask_constraints(self, value:bool=True): + if self.nll is None: + return None + self.nll.setMaskConstraints(value) \ No newline at end of file diff --git a/quickstats/components/discrete_nuisance.py b/quickstats/components/discrete_nuisance.py new file mode 100644 index 0000000000000000000000000000000000000000..26ea1bffa7a3f9895d311a6211e86b6eb7926532 --- /dev/null +++ b/quickstats/components/discrete_nuisance.py @@ -0,0 +1,232 @@ +################################################################################################## +# Based on https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit +# Author: Alkaid Cheng +# Email: chi.lung.cheng@cern.ch +################################################################################################## +from typing import List, Optional, Union, Dict, Set, Tuple, Sequence + +import numpy as np + +import ROOT + +import cppyy + +from quickstats import semistaticmethod, AbstractObject +from quickstats.utils.common_utils import combine_dict +from quickstats.interface.root import RooArgSet + +class DiscreteNuisance(AbstractObject): + + _DEFAULT_CONFIG_ = { + "disc_set_keyword": "discreteParams" + } + + @property + def multipdf_cats(self): + return self._multipdf_cats + + @property + def multipdf_params(self): + return self._multipdf_params + + @property + def all_params(self): + return self._all_params + + @property + def multipdfs(self): + return self._multipdfs + + @property + def freeze_flag(self): + return self._freeze_flag + + def __init__(self, ws:Optional["ROOT.RooWorkspace"]=None, + pdf:Optional["ROOT.RooWorkspace"]=None, + config:Optional[Dict]=None, + verbosity:Optional[Union[int, str]]="INFO"): + super().__init__(verbosity=verbosity) + self.config = combine_dict(self._DEFAULT_CONFIG_, config) + self.initialize(ws, pdf) + + @semistaticmethod + def extract_discrete_variables(self, ws:"ROOT.RooWorkspace", pdf:"ROOT.RooAbsPdf", + keyword:Optional[str]=None): + all_cats = ws.allCats().Clone() + if pdf.InheritsFrom("RooSimultaneous"): + index_cat = pdf.indexCat() + if index_cat in all_cats: + all_cats.remove(index_cat) + if not all_cats: + return ROOT.RooArgList(), ROOT.RooArgList(), ROOT.RooArgSet() + if (keyword is not None) and ws.genobj(keyword): + disc_obj = ws.genobj(keyword) + # sanity check + if not isinstance(disc_obj, ROOT.RooArgSet): + raise RuntimeError(f'discrete parameter container "{keyword}" is not an instance of RooArgSet') + disc_cats = RooArgSet.select_by_class(disc_obj, 'RooCategory') + if len(disc_cats) != len(disc_obj): + raise RuntimeError(f'discrete parameter set "{keyword}" contain instance(s) other than RooCategory') + else: + disc_cats = all_cats + all_pdfs = ws.allPdfs() + disc_pdfs = RooArgSet.select_by_class(all_pdfs, 'RooMultiPdf') + if not disc_pdfs: + return ROOT.RooArgList(), ROOT.RooArgList(), ROOT.RooArgSet() + dpd_disc_cats = RooArgSet.select_dependent_parameters(disc_cats, pdf) + dpd_disc_pdfs = RooArgSet.select_dependent_parameters(disc_pdfs, pdf) + if len(dpd_disc_cats) != len(dpd_disc_pdfs): + raise RuntimeError('mismatch between number of discrete categories and number of multi pdfs') + valid_disc_cats = ROOT.RooArgList() + valid_disc_pdfs = ROOT.RooArgList() + valid_disc_params = ROOT.RooArgSet() + for cat in dpd_disc_cats: + clients = ROOT.RooArgSet(*cat.clients()) + candidate_pdfs = clients .selectCommon(dpd_disc_pdfs) + if len(candidate_pdfs) == 1: + valid_disc_cats.add(cat) + valid_disc_pdfs.add(candidate_pdfs) + elif len(candidate_pdfs) == 0: + raise RuntimeError(f'failed to find multi pdf associated with the category "{cat.GetName()}"') + else: + raise RuntimeError(f'failed to more than one multi pdfs associated with the category "{cat.GetName()}": ' + f'{", ".join([pdf.GetName() for pdf in candidate_pdfs])}') + disc_pdf = candidate_pdfs.first() + disc_params = disc_pdf.getParameters(0) + disc_params = RooArgSet.select_by_class(disc_params, 'RooRealVar') + ROOT.RooStats.RemoveConstantParameters(disc_params) + valid_disc_params.add(disc_params) + return valid_disc_cats, valid_disc_pdfs, valid_disc_params + + def initialize(self, ws:"ROOT.RooWorkspace", pdf:"ROOT.RooAbsPdf"): + if (ws is None) and (pdf is None): + self._multipdf_cats = ROOT.RooArgList() + self._multipdfs = ROOT.RooArgList() + self._multipdf_params = ROOT.RooArgSet() + self._all_params = ROOT.RooArgSet() + self._freeze_flag = False + return + disc_set_keyword = self.config['disc_set_keyword'] + disc_cats, disc_pdfs, disc_params = self.extract_discrete_variables(ws, pdf, disc_set_keyword) + if disc_cats: + n_disc_cats = len(disc_cats) + self.stdout.info(f'Found {n_disc_cats} discrete nuisances.') + else: + self.stdout.info('No discrete nuisances found.') + + self._multipdf_cats = disc_cats + self._multipdfs = disc_pdfs + self._multipdf_params = disc_params + self._all_params = ws.allVars() + self._freeze_flag = True + + self.print_active_pdf_summary() + + def has_discrete_nuisance(self): + return len(self.multipdfs) > 0 + + def print_active_pdf_summary(self): + if not self.has_discrete_nuisance(): + return + self.stdout.info('Summary of multipdfs and their corresponding active pdfs:') + for multipdf, multipdf_cat in zip(self.multipdfs, self.multipdf_cats): + multipdf_name = multipdf.GetName() + current_pdf = multipdf.getCurrentPdf() + current_pdf_name = current_pdf.GetName() + current_index = multipdf_cat.getCurrentIndex() + self.stdout.info(f' {multipdf_name} -> {current_pdf_name} (index = {current_index})', bare=True) + + def set_freeze_flag(self, flag:bool=True): + self._freeze_flag = flag + + def freeze_discrete_params(self, freeze:bool=True): + if (not self.has_discrete_nuisance()) or (not self.freeze_flag): + return None + multipdfs = self.multipdfs + multipdf_params = ROOT.RooArgSet(self.multipdf_params) + # For each multiPdf, get the active pdf and remove its parameters + # from this list of params and then freeze the remaining ones + for multipdf in multipdfs: + current_pdf = multipdf.getCurrentPdf() + pdf_params = current_pdf.getParameters(0) + ROOT.RooStats.RemoveConstantParameters(pdf_params) + multipdf_params.remove(pdf_params) + num_params = len(multipdf_params) + if freeze: + self.stdout.debug(f'Freezing {num_params} disassociated multipdf parameters.') + else: + self.stdout.debug(f'Unfreezing {num_params} disassociated multipdf parameters.') + if self.stdout.verbosity <= "DEBUG": + multipdf_params.Print("V") + RooArgSet.set_constant_state(multipdf_params, freeze) + + def get_default_pdf_indices(self): + return np.zeros(len(self.multipdf_cats)) + + def get_current_pdf_indices(self): + return np.array([cat.getCurrentIndex() for cat in self.multipdf_cats]) + + def get_pdf_sizes(self): + return np.array([len(cat) for cat in self.multipdf_cats]) + + def get_n_orthogonal_combination(self): + pdf_sizes = self.get_pdf_sizes() + return np.sum(pdf_sizes) - len(pdf_sizes) + 1 + + def get_n_combination(self, contributing_indices:Optional[List[np.ndarray]]=None): + if contributing_indices is None: + return np.prod(self.get_pdf_sizes()) + return np.prod([np.sum(indices, dtype=int) for indices in contributing_indices]) + + def get_orthogonal_combinations(self): + pdf_sizes = self.get_pdf_sizes() + n_pdf = len(pdf_sizes) + combinations = np.zeros((np.sum(pdf_sizes) - n_pdf + 1, n_pdf), dtype=np.int32) + start_idx = 1 + for i, size in enumerate(pdf_sizes): + combinations[start_idx:start_idx + size - 1, i] = np.arange(1, size, dtype=np.int32) + start_idx += (size - 1) + return combinations + + def get_total_combinations(self, contributing_indices:Optional[List[np.ndarray]]=None): + pdf_sizes = self.get_pdf_sizes() + if contributing_indices is None: + grid_points = [np.arange(size, dtype=np.int32) for size in pdf_sizes] + else: + grid_points = [np.arange(size, dtype=np.int32)[np.array(contributing_indices[i], dtype=bool)] for i, size in enumerate(pdf_sizes)] + combinations = np.array(np.meshgrid(*grid_points)).T.reshape(-1, len(pdf_sizes)) + return combinations + + def reorder_combinations(self, combinations:np.ndarray, reference_indices:np.ndarray): + pdf_sizes = self.get_pdf_sizes() + return (combinations + reference_indices) % pdf_sizes + + def create_contributing_indices(self): + return [np.ones(size) for size in self.get_pdf_sizes()] + + def filter_combinations(self, combinations, contributing_indices:Optional[List[np.ndarray]]=None): + if contributing_indices is None: + return combinations + if np.all(contributing_indices == 1): + return combinations + pdf_sizes = self.get_pdf_sizes() + n_pdf = len(pdf_sizes) + max_size = np.max(pdf_sizes) + regular_array = np.zeros((len(pdf_sizes), max_size)) + for i in range(n_pdf): + regular_array[i, 0 : pdf_sizes[i]] = contributing_indices[i] + valid_idx = np.where(np.choose(combinations, regular_array.T).sum(axis=1) == n_pdf) + return combinations[valid_idx] + + def float_all_cats(self): + RooArgSet.set_constant_state(self.multipdf_cats, False) + + def fix_non_target_cats(self, target_index:int): + if target_index < 0: + RooArgSet.set_constant_state(self.multipdf_cats, False) + else: + RooArgSet.set_constant_state(self.multipdf_cats, True) + self.multipdf_cats.at(target_index).setConstant(False) + + def set_category_indices(self, indices:np.ndarray): + return RooArgSet.set_category_indices(self.multipdf_cats, indices) \ No newline at end of file diff --git a/quickstats/components/extended_minimizer.py b/quickstats/components/extended_minimizer.py index 1ffbad2b81b83688f691f7eac5e262e74eed1e62..066b65277b6ad9df6b1a89489ac8ddadd62a2603 100644 --- a/quickstats/components/extended_minimizer.py +++ b/quickstats/components/extended_minimizer.py @@ -9,8 +9,16 @@ import math import numpy as np import ROOT +import cppyy + import quickstats -from quickstats import AbstractObject +from quickstats import AbstractObject, Timer +from quickstats.utils.string_utils import split_str +from quickstats.utils.common_utils import combine_dict +from quickstats.interface.root import RooArgSet +from quickstats.interface.cppyy.vectorize import as_vector +from .discrete_nuisance import DiscreteNuisance +from .caching_nll_wrapper import CachingNLLWrapper class ExtendedMinimizer(AbstractObject): _SCAN_EXCLUDE_CONFIG_ = ['scan', 'minos', 'save'] @@ -41,7 +49,24 @@ class ExtendedMinimizer(AbstractObject): 'n_sigma': 1, 'precision': 0.001, 'strategy': 1, - 'print_level': -1 + 'print_level': -1, + 'error_level': -1, + # extra configs + 'check_boundary': 1, + 'prefit_hesse': 0, + 'postfit_hesse': 0, + 'minuit2_storage_level': 0, + # cms dedicated configs + 'set_zero_point': 1, # change the reference point of the NLL to be zero during minimization + # discrete nuisance configs + 'discrete_min_tol': 0.001, + 'do_discrete_iteration': 1, + 'freeze_disassociated_params': 1, + 'do_short_combination': 1, + 'max_short_combination_iteration': 15, + 'multimin_hide_constants': 1, + 'multimin_mask_constraints': 1, + 'multimin_mask_channels': 2 } _DEFAULT_NLL_OPTION_ = { @@ -50,13 +75,35 @@ class ExtendedMinimizer(AbstractObject): 'batch_mode': False, 'int_bin_precision': -1 } + + _DEFAULT_RUNTIMEDEF_ = { + "OPTIMIZE_BOUNDS": 1, + "ADDNLL_RECURSIVE": 1, + "ADDNLL_GAUSSNLL": 1, + "ADDNLL_HISTNLL": 1, + "ADDNLL_CBNLL": 1, + "TMCSO_AdaptivePseudoAsimov": 1, + "ADDNLL_ROOREALSUM_FACTOR": 1, + "ADDNLL_ROOREALSUM_NONORM": 1, + "ADDNLL_ROOREALSUM_BASICINT": 1, + "ADDNLL_ROOREALSUM_KEEPZEROS": 1, + "ADDNLL_PRODNLL": 1, + "ADDNLL_HFNLL": 1, + "ADDNLL_HISTFUNCNLL": 1, + "ADDNLL_ROOREALSUM_CHEAPPROD": 1 + } @property def config(self): return self._config - def __init__(self, minimizer_name:str, pdf:"ROOT.RooAbsPdf", data:"ROOT.RooAbsData", + def __init__(self, minimizer_name:str="Minimizer", + pdf:Optional["ROOT.RooAbsPdf"]=None, + data:Optional["ROOT.RooAbsData"]=None, + poi:Optional["ROOT.RooArgSet"]=None, minimizer_options:Optional[Dict]=None, + runtimedef_expr:Optional[str]=None, + discrete_nuisance:Optional[DiscreteNuisance]=None, verbosity:Optional[Union[int, str]]="INFO", **kwargs): super().__init__(verbosity=verbosity) @@ -91,6 +138,16 @@ class ExtendedMinimizer(AbstractObject): self.nll_commands = {} self.configure_default_minimizer_options() + self.set_pdf(pdf) + self.set_data(data) + self.set_poi(poi) + + # CMS specific implementations + self.set_discrete_nuisance(discrete_nuisance) + self.caching_nll = CachingNLLWrapper(verbosity=self.stdout.verbosity) + self.set_pois_for_auto_bounds() + self.set_pois_for_max_bounds() + self.set_runtimedef(runtimedef_expr) self.stdout.info(f'Created ExtendedMinimizer("{self.name}") instance') @@ -103,6 +160,7 @@ class ExtendedMinimizer(AbstractObject): assert isinstance(val, ROOT.RooAbsData) self._data = val self.nll = None + self.minimizer = None @property def pdf(self): @@ -111,12 +169,30 @@ class ExtendedMinimizer(AbstractObject): @pdf.setter def pdf(self, val): assert isinstance(val, ROOT.RooAbsPdf) - self._pdf = val + self._pdf = val + self.nll = None + self.minimizer = None + + @property + def discrete_nuisance(self): + return self._discrete_nuisance + + @property + def poi(self): + return self._poi + + @property + def auto_bounds_pois(self): + return self._auto_bounds_pois + + @property + def max_bounds_pois(self): + return self._max_bounds_pois @staticmethod def _configure_default_minimizer_options(minimizer_type='Minuit2', minimizer_algo='Migrad', strategy=0, print_level=-1, - debug_mode=False): + minuit2_storage_level=0, debug_mode=False): ROOT.Math.MinimizerOptions.SetDefaultMinimizer(minimizer_type, minimizer_algo) ROOT.Math.MinimizerOptions.SetDefaultStrategy(strategy) ROOT.Math.MinimizerOptions.SetDefaultPrintLevel(print_level) @@ -127,17 +203,68 @@ class ExtendedMinimizer(AbstractObject): ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.FATAL) if ROOT.Math.MinimizerOptions.DefaultPrintLevel() < 0: ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.FATAL) + if minimizer_type == 'Minuit2': + options = ROOT.Math.MinimizerOptions.Default('Minuit2') + options.SetValue('StorageLevel', minuit2_storage_level) return None def configure_default_minimizer_options(self): self._configure_default_minimizer_options(self.config['minimizer_type'], self.config['minimizer_algo'], self.config['strategy'], - self.config['print_level']) + self.config['print_level'], + self.config['minuit2_storage_level']) return None + def set_discrete_nuisance(self, discrete_nuisance:Optional[DiscreteNuisance]=None): + if discrete_nuisance is None: + self._discrete_nuisance = DiscreteNuisance() + else: + self._discrete_nuisance = discrete_nuisance + + def has_discrete_nuisance(self): + return self.discrete_nuisance.has_discrete_nuisance() + + def set_runtimedef(self, expr:Optional[Union[str, Dict]]=None, ignore_if_not_applicable:bool=True): + if not hasattr(cppyy.gbl, 'runtimedef'): + if not ignore_if_not_applicable: + raise RuntimeError('can not set minimizer runtime defines: namespace "runtimedef" not defined') + return None + runtimedef_map = combine_dict(self._DEFAULT_RUNTIMEDEF_) + if expr is None: + pass + elif isinstance(expr, str): + tokens = split_str(expr, ',', remove_empty=True) + for token in tokens: + runtimedef = split_str(token, '=') + if (len(runtimedef) != 2) or (not runtimedef[1].isdigit()): + raise ValueError(f'invalid token in runtimedef expression: "{token}"') + name, value = runtimedef[0], int(runtimedef[1]) + runtimedef_map[name] = value + elif isinstance(expr, dict): + runtimedef_map = combine_dict(runtimedef_map, expr) + else: + raise ValueError(f'invalid runtimedef expression: "{expr}"') + for name, value in runtimedef_map.items(): + cppyy.gbl.runtimedef.set(name, value) + self.stdout.info(f'Set runtimedef "{name}" = {value}') + + def set_pdf(self, pdf:"ROOT.RooAbsPdf"): + self.pdf = pdf + def set_data(self, data:"ROOT.RooAbsData"): self.data = data + + def set_poi(self, pois:Optional[Union["ROOT.RooRealVar", "ROOT.RooArgSet"]]=None): + if pois is None: + pois = ROOT.RooArgSet() + self._poi = pois + + def set_pois_for_auto_bounds(self, pois:Optional["ROOT.RooArgSet"]=None): + self._auto_bounds_pois = pois + + def set_pois_for_max_bounds(self, pois:Optional["ROOT.RooArgSet"]=None): + self._max_bounds_pois = pois def _bug_fix_create_nll(self, dataset:"ROOT.RooDataSet", nll_commands:List["ROOT.RooCmdArg"]): # fix range bug @@ -164,6 +291,8 @@ class ExtendedMinimizer(AbstractObject): def _create_nll(self, nll_commands:List["ROOT.RooCmdArg"]=None, dataset:Optional["ROOT.RooDataSet"]=None): + if self.pdf is None: + raise RuntimeError('pdf not initialized') if nll_commands is None: nll_commands = list(self.nll_commands.values()) #self.nll = self.pdf.createNLL(self.data, nll_command_list) @@ -172,6 +301,8 @@ class ExtendedMinimizer(AbstractObject): range_name = command.getString(0) self.stdout.info(f"Using the range \"{range_name}\" for NLL calculation") if dataset is None: + if not self.data: + raise RuntimeError('dataset not initialized') dataset = self.data if quickstats.root_version >= (6, 26, 0): self._bug_fix_create_nll(dataset, nll_commands) @@ -219,11 +350,16 @@ class ExtendedMinimizer(AbstractObject): if not self.nll: raise RuntimeError('NLL not initialized') + self.caching_nll.set_nll(self.nll) + self.caching_nll.set_hide_categories(True) + if (self.minimizer is None) or (not self.config['reuse_minimizer']): self.configure_default_minimizer_options() self.minimizer = ROOT.RooMinimizer(self.nll) self.minimizer.setOffsetting(self.config['minimizer_offset']) self.minimizer.setPrintLevel(self.config['print_level']) + if self.config['error_level'] >= 0: + self.minimizer.setErrorLevel(self.config['error_level']) self.minimizer.optimizeConst(self.config['optimize']) self.minimizer.setMinimizerType(self.config['minimizer_type']) self.minimizer.setEvalErrorWall(self.config['do_ee_wall']) @@ -241,6 +377,8 @@ class ExtendedMinimizer(AbstractObject): if self.config['max_iters'] != -1: self.minimizer.setMaxIterations(self.config['max_iters']) + + self.caching_nll.set_hide_categories(False) def construct_nll_commands(self, constrain=None, global_observables=None, conditional_observables=None, **kwargs): @@ -267,11 +405,6 @@ class ExtendedMinimizer(AbstractObject): command_dict = {} for command in commands: command_dict[command.GetName()] = command - # temporary fix for ROOT version below 6.22/06 - #nll_command_list = ROOT.RooLinkedList() - #for command in commands: - # nll_command_list.Add(command) - #return nll_command_list return command_dict def set_nll_commands(self, nll_commands:List["ROOT.RooCmdArg"]): @@ -317,128 +450,177 @@ class ExtendedMinimizer(AbstractObject): self.fit_options = kwargs self.scan_options = {k:v for k,v in kwargs.items() if k not in self._SCAN_EXCLUDE_CONFIG_} - - def minimize(self, nll=None, **kwargs): + + def expand_poi_bounds(self, threshold:float=0.1): + if (not self.auto_bounds_pois) and (not self.max_bounds_pois): + return False + expanded_bounds = False + for bound_type, pois in [('max', self.max_bounds_pois), + ('both', self.auto_bounds_pois)]: + if not pois: + continue + orig_pois_at_min = ROOT.RooArgSet() + new_pois_at_min = ROOT.RooArgSet() + orig_pois_at_max = ROOT.RooArgSet() + new_pois_at_max = ROOT.RooArgSet() + if bound_type == "max": + RooArgSet.expand_parameters_range(pois, threshold, True, False, + orig_pois_at_min, + new_pois_at_min, + orig_pois_at_max, + new_pois_at_max) + elif bound_type == 'both': + RooArgSet.expand_parameters_range(pois, threshold, True, True, + orig_pois_at_min, + new_pois_at_min, + orig_pois_at_max, + new_pois_at_max) + else: + raise ValueError(f'unknown bound type: {bound_type}') + threshold_pc = threshold * 100 + for orig_poi, new_poi in zip(orig_pois_at_min, new_pois_at_min): + expanded_bounds = True + self.stdout.info(f'Parameter {orig_poi.GetName()} has value {orig_poi.getVal()} which is within ' + f'{threshold_pc}% from the low boundary {orig_poi.getMin()}. Will enlarge range ' + f'to [{new_poi.getMin()}, {new_poi.getMax()}].') + for orig_poi, new_poi in zip(orig_pois_at_max, new_pois_at_max): + expanded_bounds = True + self.stdout.info(f'Parameter {orig_poi.GetName()} has value {orig_poi.getVal()} which is within ' + f'{threshold_pc}% from the max boundary {orig_poi.getMax()}. Will enlarge range ' + f'to [{new_poi.getMin()}, {new_poi.getMax()}].') + return expanded_bounds + + def check_param_boundary(self): + if not self.nll: + self.stdout.warning('Failed to check boundary values: nll not set') + return None + nll_params = self.nll.getParameters(self.data.get()) + nll_params.remove(self.poi) + ROOT.RooStats.RemoveConstantParameters(nll_params) + nll_params = RooArgSet.select_by_class(nll_params, 'RooRealVar') + boundary_params = RooArgSet.get_boundary_parameters(nll_params) + if boundary_params: + self.stdout.warning('Found parameters near boundary (within 1 sigma) after fit.') + for param in boundary_params: + self.stdout.warning(f' Parameter = {param.GetName()}, Value = {param.getVal()}, ' + f'RangeLo = {param.getMin()}, RangeHi = {param.getMax()}, ' + f'ErrorLo = {param.getErrorLo()}, ErrorHi = {param.getErrorHi()}', + bare=True) + + def save_fit_result(self, named:bool=False): + if not self.minimizer: + self.stdout.warning('Failed to save fit result: minimizer not set') + return None + if named: + data_name = self.data.GetName() + save_name = f"fitresult_{self.name}_{data_name}" + save_title = f"Result of fit of p.d.f. {self.name} to dataset {data_name}" + self.stdout.info(f'ExtendedMinimizer::minimize("{self.name}") saving results as {save_name}') + self.fit_result = self.minimizer.save(save_name, save_title) + else: + self.fit_result = self.minimizer.save() + + def minimize(self, nll=None, cascade:bool=True, **kwargs): self.configure(**kwargs) if nll is None: self.create_nll() else: self.nll = nll - - if self.cond_set.size() > 0: - attached_set = self.nll.getVariables() - for cond in self.cond_set: - buffer = attached_set.find(cond.GetName()) - if buffer: - buffer.setVal(cond.getVal()) - buffer.setConstant(1) + self.min_nll = None + self.discrete_nuisance.set_freeze_flag(self.config['freeze_disassociated_params']) + + if self.cond_set: + nll_variables = self.nll.getVariables() + selected_variables = nll_variables.selectCommon(self.cond_set) + selected_variables.assignValueOnly(self.cond_set) + RooArgSet.set_constant_state(selected_variables, True) self.create_minimizer() + status = 0 - attached_set = self.nll.getVariables() - perform_minimization = any(not attached.isConstant() for attached in attached_set) - + nll_variables = self.nll.getVariables() + perform_minimization = RooArgSet.select_by_constant_state(nll_variables, False) if not perform_minimization: - self.stdout.info('ExtendedMinimizer::minimize("{}") no floating parameters found' - '-- skipping minimization'.format(self.name)) + self.stdout.info('ExtendedMinimizer::minimize("{}") no floating parameters found ' + '-- skipping minimization'.format(self.name)) + self.min_nll = self.nll.getVal() else: - status = self.robust_minimize() + self.discrete_nuisance.freeze_discrete_params(True) - # Evaluate errors with improve - if self.config['improve']: - self.minimizer.improve() - - # Evaluate errors with Hesse - if self.config['hesse']: - self.minimizer.hesse() - - # Obtain Hessian matrix either from patched Minuit or after inversion - # TMatrixDSym G = Minuit2::MnHesse::lastHessian(); - last_fit = self.minimizer.lastMinuitFit() - if last_fit: - self.Hessian_matrix = last_fit.covarianceMatrix().Invert() - else: - self.Hessian_matrix = None - - # Eigenvalue and eigenvector analysis - if self.config['eigen']: - self.eigen_analysis() - - # Evaluate errors with Minos - if self.config['minos']: - if self.minos_set.size() > 0: - self.minimizer.minos(self.minos_set) + # actual minimization done here + if (self.has_discrete_nuisance() and self.config['do_discrete_iteration']): + status = self.discrete_minimize(cascade=cascade) else: - self.minimizer.minos() + status = self.robust_minimize(cascade=cascade) + + # Evaluate errors with improve + if self.config['improve']: + self.minimizer.improve() + + # Evaluate errors with Hesse + if self.config['hesse']: + self.minimizer.hesse() + + # Eigenvalue and eigenvector analysis + if self.config['eigen']: + self.eigen_analysis() + + # Evaluate errors with Minos + if self.config['minos']: + if self.minos_set.size() > 0: + self.minimizer.minos(self.minos_set) + else: + self.minimizer.minos() + + self.discrete_nuisance.freeze_discrete_params(False) + + # post-processing + if self.config['check_boundary']: + self.check_param_boundary() - self.min_nll = self.nll.getVal() + self.min_nll = self.nll.getVal() - if self.config['scan']: - self.find_sigma() + if self.config['scan']: + self.find_sigma() - if self.config['save']: - data_name = self.data.GetName() - save_name = "fitresult_{}_{}".format(self.name, data_name) - save_title = "Result of fit of p.d.f. {} to dataset {}".format(self.name, data_name) - self.stdout.info('ExtendedMinimizer::minimize("{}") saving results as {}'.format(self.name, save_name)) - self.fit_result = self.minimizer.save(save_name, save_title) - # if the 'scan' option is used, minimizer will get deleted before this line - elif self.minimizer is not None: - self.fit_result = self.minimizer.save() - - if self.cond_set.size() > 0: - attached_set = self.nll.getVariables() - for cond in self.cond_set: - buffer = attached_set.find(cond.GetName()) - if buffer: - buffer.setVal(cond.getVal()) - buffer.setConstant(cond.isConstant()) + # if the 'scan' option is used, minimizer will get deleted before this line + if self.minimizer is not None: + self.save_fit_result(named=self.config['save']) + + if self.cond_set: + nll_variables = self.nll.getVariables() + selected_variables = nll_variables.selectCommon(self.cond_set) + selected_variables.assign(self.cond_set) + # dispose of minimizer if not self.config['reuse_minimizer']: self.minimizer = None self.status = status return status - - def eigen_analysis(self): - if not isinstance(self.Hessian_matrix, ROOT.TMatrixDSym): - raise ValueError('invalid Hessian matrix') - n = self.Hessian_matrix.GetNrows() - - # construct reduced Hessian matrix - Gred = ROOT.TMatrixDSym(n) - for i in range(n): - for j in range(n): - norm = math.sqrt(self.Hessian_matrix(i, i)*self.Hessian_matrix(j, j)) - Gred[i][j] = self.Hessian_matrix(i, j)/norm - - # perform eigenvalue analysis using ROOT standard tools - Geigen = ROOT.TMatrixDSymEigen(Gred) - - self.eigen_values = Geigen.GetEigenValues() - self.eigen_vectors = Geigen.GetEigenVectors() - - # simple printing of eigenvalues and eigenvectors - self.eigen_values.Print() - self.eigen_vectors.Print() - - def robust_minimize(self): + def robust_minimize(self, cascade:bool=True): + self.stdout.debug('Begin robust minimization.') strategy = self.config['strategy'] retry = self.config['retry'] minimizer_type = self.config['minimizer_type'] minimizer_algo = self.config['minimizer_algo'] - status = self.minimizer.minimize(minimizer_type, minimizer_algo) - while ((status != 0) and (status!=1) and (retry>0)): - if (strategy < 2): + status = self.single_minimize(minimizer_type, minimizer_algo) + # repeat if fit failed or poi(s) at boundary + expanded_pois = self.expand_poi_bounds() + def require_cascade(fit_status:int): + return cascade and (fit_status not in [0, 1]) + while ((require_cascade(status) or expanded_pois) and (retry > 0)): + if (not expanded_pois) and (strategy < 2): strategy += 1 retry -= 1 self.stdout.error(f'ExtendedMinimizer::robust_minimize("{self.name}") fit failed with status {status}. ' f'Retrying with strategy {strategy}') self.minimizer.setStrategy(strategy) - status = self.minimizer.minimize(minimizer_type, minimizer_algo) + status = self.single_minimize(minimizer_type, minimizer_algo) + expanded_pois = self.expand_poi_bounds() if status not in [0, 1]: self.stdout.error(f'ExtendedMinimizer::robust_minimize("{self.name}") fit failed with status {status}') @@ -446,7 +628,237 @@ class ExtendedMinimizer(AbstractObject): self.minimizer.setStrategy(self.config['strategy']) return status + + def single_minimize(self, minimizer_type:str, minimizer_algo:str): + self.stdout.debug('Begin single minimization.') + if self.minimizer is None: + self.create_minimizer() + self.discrete_nuisance.freeze_discrete_params(True) + if self.config['set_zero_point']: + self.caching_nll.set_nll(self.nll) + else: + self.caching_nll.set_nll() + self.caching_nll.set_zero_point() + if self.config['prefit_hesse']: + self.minimizer.hesse() + self.caching_nll.update_zero_point() + #nll_params = self.nll.getParameters(0) + #RooArgSet.save_data_as_txt(nll_params, 'minimizer_prefit_result.txt', 8) + status = self.minimizer.minimize(minimizer_type, minimizer_algo) + #RooArgSet.save_data_as_txt(nll_params, 'minimizer_postfit_result.txt', 8) + if self.config['postfit_hesse']: + self.caching_nll.update_zero_point() + self.minimizer.hesse() + self.caching_nll.clear_zero_point() + self.discrete_nuisance.freeze_discrete_params(False) + return status + + def discrete_minimize(self, cascade:bool=True): + """Minimization involving discrete nuisances + """ + status = 0 + + do_short_combination = self.config['do_short_combination'] + if do_short_combination: + self.stdout.info('Begin discrete minimization with short combinations.') + with Timer() as t: + status = self.robust_minimize(cascade=cascade) + self.min_nll = self.nll.getVal() + prev_nll = self.min_nll + max_iteration = self.config['max_short_combination_iteration'] + min_tol = self.config['discrete_min_tol'] + for i in range(max_iteration): + self.stdout.info(f'Current iteration: {i + 1}') + status = self.iterative_minimize(cascade=cascade) + delta_nll = abs(prev_nll - self.min_nll) + self.stdout.info(f'Previous NLL = {prev_nll}, Minimum NLL = {self.min_nll}, Delta NLL = {delta_nll}') + if delta_nll < min_tol: + self.stdout.info(f'Delta NLL is within the required tolerence of {min_tol}.') + break + prev_nll = self.min_nll + num_iteration = i + 1 + self.stdout.info(f'Discrete minimization finished in {num_iteration} iterations. Total time taken: {t.interval:.3f} s') + else: + self.stdout.info('Begin discrete minimization with full combinations.') + with Timer() as t: + clean_snapshot = ROOT.RooArgSet() + nll_params = self.nll.getParameters(0) + nll_params.remove(self.discrete_nuisance.multipdf_cats) + nll_params.snapshot(clean_snapshot) + min_nll = 10 + nll.getVal() + self.multiple_minimize(mode=0, clean_snapshot=clean_snapshot, cascade=cascade) + if len(self.discrete_nuisance.multipdfs) > 1: + self.multiple_minimize(mode=1, clean_snapshot=clean_snapshot, cascade=cascade) + self.multiple_minimize(mode=2, clean_snapshot=clean_snapshot, cascade=cascade) + if self.discrete_nuisance.freeze_flag: + self.discrete_nuisance.freeze_discrete_params(True) + status = self.robust_minimize(cascade=cascade) + self.discrete_nuisance.freeze_discrete_params(False) + self.stdout.info(f'Discrete minimization finished. Total time taken: {t.interval:.3f} s') + final_combination = self.discrete_nuisance.get_current_pdf_indices() + self.stdout.info(f'Final index combination: {list(final_combination)}') + return status + + # taken from https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit/blob/main/src/CascadeMinimizer.cc + def iterative_minimize(self, cascade:bool=True): + with Timer() as t: + min_tol = self.config['discrete_min_tol'] + if self.min_nll is None: + self.min_nll = self.nll.getVal() + if abs(self.min_nll - self.nll.getVal()) > min_tol: + self.robust_minimize(cascade=cascade) + + self.discrete_nuisance.freeze_discrete_params(True) + + params_to_freeze = ROOT.RooArgSet(self.discrete_nuisance.all_params) + params_to_freeze.remove(self.discrete_nuisance.multipdf_params) + params_to_freeze.add(self.poi) + ROOT.RooStats.RemoveConstantParameters(params_to_freeze) + RooArgSet.set_constant_state(params_to_freeze, True) + nll_params = self.nll.getParameters(0) + nll_params.remove(self.discrete_nuisance.multipdf_cats) + ROOT.RooStats.RemoveConstantParameters(nll_params) + clean_snapshot = nll_params.snapshot() + # now cycle and fit + status = 0 + # start frm simplest scan, this is the full scan if do_short_combination is off + self.multiple_minimize(clean_snapshot=clean_snapshot, cascade=cascade, mode=0) + RooArgSet.set_constant_state(params_to_freeze, False) + # run one last fully floating fit to maintain RooFitResult + status = self.robust_minimize(cascade=cascade) + self.discrete_nuisance.freeze_discrete_params(False) + self.stdout.info(f'Finished iteration. Total time taken: {t.interval:.3f} s.') + return status + + # taken from https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit/blob/main/src/CascadeMinimizer.cc + def multiple_minimize(self, clean_snapshot:"ROOT.RooArgSet", cascade:bool=True, mode:int=0, + contributing_indices:Optional[List[np.ndarray]]=None): + if not self.has_discrete_nuisance(): + raise RuntimeError('multiple minimize should only be used when discrete nuisances are available') + hide_constants = self.config['multimin_hide_constants'] + mask_constraints = self.config['multimin_mask_constraints'] + mask_channels = self.config['multimin_mask_channels'] + mask_channels_ex = (mask_channels == 2) + self.caching_nll.set_nll(self.nll) + + strategy_tmp = self.config['strategy'] + self.config['strategy'] = 0 + + new_discrete_minimum = False + multipdf_cats = self.discrete_nuisance.multipdf_cats + num_cats = len(multipdf_cats) + pdf_sizes = self.discrete_nuisance.get_pdf_sizes() + best_indices = self.discrete_nuisance.get_current_pdf_indices() + self.stdout.info(f'Current index combination: {list(best_indices)}') + self.stdout.info(f'Current NLL: {self.nll.getVal()}') + + if (mode == 0) or (contributing_indices is None): + contributing_indices = self.discrete_nuisance.create_contributing_indices() + + # keep hold of best fitted parameters + nll_params = self.nll.getParameters(0) + nll_params.remove(self.discrete_nuisance.multipdf_cats) + nll_params_snapshot = nll_params.snapshot() + + if mask_channels: + self.caching_nll.set_mask_non_discrete_channels(True) + if hide_constants: + self.caching_nll.set_hide_constants(True) + if mask_constraints: + self.caching_nll.set_mask_constraints(True) + self.minimizer = None + + if (mode in [0, 1]): + n_combination = self.discrete_nuisance.get_n_orthogonal_combination() + self.stdout.info(f'Generating {n_combination} orthogonal index combinations.') + combinations = self.discrete_nuisance.get_orthogonal_combinations() + else: + n_combination = self.discrete_nuisance.get_n_orthogonal_combination(contributing_indices) + self.stdout.info(f'Generating {n_combination} index combinations.') + combinations = self.discrete_nuisance.get_total_combinations(contributing_indices) + + # reorder combinations starting from indices closest to the best indices + combinations = self.discrete_nuisance.reorder_combinations(combinations, best_indices) + + # filter combinations that are not contributing + combinations = self.discrete_nuisance.filter_combinations(combinations, contributing_indices) + + self.stdout.info(f'Total number of combinations after filtering contributing indices: {len(combinations)}') + + new_discrete_minimum = False + # skip the best fit case if already done + i_start = 1 if (mode != 0) else 0 + self.stdout.info(f'Begin fast loop minimization of {len(combinations)} index combinations.') + with Timer() as t: + fit_counter = 0 + max_deviation = 5 + multipdf_cats = self.discrete_nuisance.multipdf_cats + # the overhead in this for loop should be way less than the fit time + for combination in combinations[i_start:]: + changed_index = self.discrete_nuisance.set_category_indices(combination) + if fit_counter > 0: + nll_params.assignValueOnly(clean_snapshot) + if mask_channels_ex: + self.discrete_nuisance.fix_non_target_cats(changed_index) + self.caching_nll.set_mask_non_discrete_channels(True) + self.discrete_nuisance.freeze_discrete_params(True) + status = self.robust_minimize(cascade=cascade) + if mask_channels_ex: + self.discrete_nuisance.float_all_cats() + self.caching_nll.set_mask_non_discrete_channels(False) + + self.discrete_nuisance.freeze_discrete_params(False) + fit_counter += 1 + current_nll = self.nll.getVal() + delta_nll = current_nll - self.min_nll + self.stdout.debug(f'Index combination: {list(combination)}, NLL = {current_nll}, delta NLL = {delta_nll}') + # found new minimum + if (delta_nll < 0): + self.min_nll = current_nll + nll_params_snapshot.assignValueOnly(nll_params) + # set the best indices again + if not np.array_equal(best_indices, combination): + self.stdout.info(f'Found a better minimum at {list(combination)}. ' + f'New NLL = {current_nll}, delta NLL = {delta_nll}.') + new_discrete_minimum = True + best_indices = combination.copy() + # discard pdf that gives large nll + if (mode == 1): + if (delta_nll > max_deviation): + index_diff = np.where(best_indices != combination)[0] + diff_count = index_diff.shape[0] + if diff_count == 1: + index_diff = index_diff[0] + cat = self.discrete_nuisance.multipdf_cats.at(index_diff) + cat_index = cat.getIndex() + if (cat_index != best_indices[index_diff]): + contributing_indices[index_diff][cat_index] = 0 + pdf_name = self.discrete_nuisance.multipdfs.at(index_diff).GetName() + self.stdout.info(f'Found pdf index that gives large nll. Discarding pdf index ' + f'{cat_index} from the multipdf "{pdf_name}"') + # assign best indices + self.discrete_nuisance.set_category_indices(best_indices) + nll_params.assignValueOnly(nll_params_snapshot) + n_combination_run = len(combinations) - i_start + time_per_combination = (t.interval / n_combination_run) + + self.stdout.info(f'Done {n_combination_run} combinations in {t.interval:.3f} s ' + f'({time_per_combination:.4f} s per combination). New discrete minimum? {new_discrete_minimum}') + + self.config['strategy'] = strategy_tmp + + if mask_channels: + self.caching_nll.set_mask_non_discrete_channels(False) + + if hide_constants: + self.caching_nll.set_hide_constants(False) + if mask_constraints: + self.caching_nll.set_mask_constraints(False) + self.minimizer = None + + return new_discrete_minimum + def use_limits(self, par:"ROOT.RooRealVar", val:float): if (val < par.getMin()): self.stdout.warning(f'ExtendedMinimizer::use_limits("{self.name}") {par.GetName()} = {val} ' @@ -761,3 +1173,54 @@ class ExtendedMinimizer(AbstractObject): g_interpolated.SetMarkerStyle(20) return (g, g_interpolated) + + def eigen_analysis(self): + if not self.minimizer: + self.stdout.warning('Failed to get Hessian matrix: minimizer not set') + return None + # Obtain Hessian matrix either from patched Minuit or after inversion + # TMatrixDSym G = Minuit2::MnHesse::lastHessian(); + last_fit = self.minimizer.lastMinuitFit() + if not last_fit: + self.stdout.warning('Failed to get Hessian matrix: no fit performed') + return None + + self.Hessian_matrix = last_fit.covarianceMatrix().Invert() + + if not isinstance(self.Hessian_matrix, ROOT.TMatrixDSym): + raise ValueError('invalid Hessian matrix') + n = self.Hessian_matrix.GetNrows() + + # construct reduced Hessian matrix + Gred = ROOT.TMatrixDSym(n) + for i in range(n): + for j in range(n): + norm = math.sqrt(self.Hessian_matrix(i, i)*self.Hessian_matrix(j, j)) + Gred[i][j] = self.Hessian_matrix(i, j)/norm + + # perform eigenvalue analysis using ROOT standard tools + Geigen = ROOT.TMatrixDSymEigen(Gred) + + self.eigen_values = Geigen.GetEigenValues() + self.eigen_vectors = Geigen.GetEigenVectors() + + # simple printing of eigenvalues and eigenvectors + self.eigen_values.Print() + self.eigen_vectors.Print() + + def get_floating_nll_params(self): + if not self.nll: + raise RuntimeError('NLL not initialized') + nll_params = self.nll.getParameters(0) + ROOT.RooStats.RemoveConstantParameters(nll_params) + return nll_params + + def print_floating_nll_params(self): + nll_params = self.get_floating_nll_params() + for param in nll_params: + if param.InheritsFrom('RooRealVar'): + self.stdout.info(f'{param.GetName()} {param.getVal()}') + elif param.InheritsFrom('RooCategory'): + self.stdout.info(f'{param.GetName()} {param.currentIndex()}') + else: + self.stdout.info(f'{param.GetName()}') \ No newline at end of file diff --git a/quickstats/components/extended_model.py b/quickstats/components/extended_model.py index 266acc26d2fb4872f4470f36cb739497c3dbb2ed..ffadcb4aae997260d1edbeaeb692e688df72054a 100644 --- a/quickstats/components/extended_model.py +++ b/quickstats/components/extended_model.py @@ -22,7 +22,7 @@ from quickstats import semistaticmethod, AbstractObject from quickstats.core.io import switch_verbosity from quickstats.maths.numerics import is_integer, pretty_value, get_bins_given_edges, array_issubset, get_rmin_rmax from quickstats.parsers import RooParamSetupParser -from quickstats.utils.root_utils import load_macro +from quickstats.utils.root_utils import load_macro, close_read_cache from quickstats.utils.common_utils import str_list_filter, combine_dict, filter_by_wildcards from quickstats.utils import roofit_utils from quickstats.utils.string_utils import split_str, remove_whitespace @@ -31,6 +31,7 @@ from quickstats.interface.root import roofit_extension as rf_ext from quickstats.interface.root.roofit_extension import get_str_data from quickstats.components.basics import WSArgument, SetValueMode, ConstraintType from .extended_minimizer import ExtendedMinimizer +from .discrete_nuisance import DiscreteNuisance class ExtendedModel(AbstractObject): @@ -54,6 +55,20 @@ class ExtendedModel(AbstractObject): 'range_sideband_high': 'SBHi' } + _DEFAULT_CONFIGS_ = { + 'filename': None, + 'ws_name': None, + 'mc_name': None, + 'data_name': None, + 'initial_snapshots': None, + 'do_discrete_nuisance': True, + 'do_fix_cache': True, + 'do_fix_multi': True, + 'do_binned_likelihood': True, + 'interpolation_code': -1, + 'tag_as_measurement': None, + } + _DEFAULT_CONTENT_ = { WSArgument.PDF : kClassName|kName|kArgs, WSArgument.FUNCTION : kClassName|kName|kArgs @@ -63,23 +78,28 @@ class ExtendedModel(AbstractObject): data_name:Optional[str]="combData", snapshot_name:Optional[Union[List[str], str]]=None, binned_likelihood:bool=True, tag_as_measurement:Optional[str]=None, fix_cache:bool=True, fix_multi:bool=True, interpolation_code:int=-1, - load_extension:bool=True, minimizer_cls=None, verbosity:Optional[Union[int, str]]="INFO"): - super().__init__(verbosity=verbosity) - self.filename = filename - self.ws_name = ws_name - self.mc_name = mc_name - self.data_name = data_name - self.initial_snapshots = snapshot_name - self.binned_likelihood = binned_likelihood - self.tag_as_measurement = tag_as_measurement - self.fix_cache = fix_cache - self.fix_multi = fix_multi - self.interpolation_code = interpolation_code - self.last_fit_status = None - if minimizer_cls is None: - self.minimizer_cls = ExtendedMinimizer - else: - self.minimizer_cls = minimizer_cls + discrete_nuisance:bool=True, load_extension:bool=True, minimizer=None, + verbosity:Optional[Union[int, str]]="INFO"): + super().__init__(verbosity=verbosity) + + self.set_minimizer(minimizer) + + config = { + 'ws_name': ws_name, + 'mc_name': mc_name, + 'data_name': data_name, + 'initial_snapshots': snapshot_name, + 'do_discrete_nuisance': discrete_nuisance, + 'do_fix_cache': fix_cache, + 'do_fix_multi': fix_multi, + 'do_binned_likelihood': binned_likelihood, + 'interpolation_code': interpolation_code, + 'tag_as_measurement': tag_as_measurement + } + + self.config = combine_dict(self._DEFAULT_CONFIGS_, config) + # avoid copying RooWorkspace instance + self.config['filename'] = filename quickstats.load_corelib() if load_extension: @@ -118,25 +138,17 @@ class ExtendedModel(AbstractObject): def observables(self): return self._observables @property + def discrete_nuisance(self): + return self._discrete_nuisance + @property def category(self): return self._category @property def floating_auxiliary_variables(self): return self._floating_auxiliary_variables @property - def initial_snapshots(self): - return self._initial_snapshots - - @initial_snapshots.setter - def initial_snapshots(self, val): - if val is None: - self._initial_snapshots = [] - elif isinstance(val, str): - self._initial_snapshots = split_str(val, sep=',', remove_empty=True) - elif isinstance(val, list): - self._initial_snapshots = val - else: - raise ValueError('"initial_snapshots" must be string or list of strings') + def minimizer(self): + return self._minimizer @semistaticmethod def load_extension(self): @@ -194,59 +206,69 @@ class ExtendedModel(AbstractObject): self.stdout.info(f'Deactivated level 2 constant term optimization for {name}') def initialize(self): - if isinstance(self.filename, str): - if not os.path.exists(self.filename): - raise FileNotFoundError(f'workspace file "{self.filename}" does not exist') - self.stdout.info(f'Opening file "{self.filename}"') - file = ROOT.TFile(self.filename) + filename = self.config['filename'] + ws_name = self.config['ws_name'] + mc_name = self.config['mc_name'] + if isinstance(filename, str): + if not os.path.exists(filename): + raise FileNotFoundError(f'workspace file "{filename}" does not exist') + self.stdout.info(f'Opening file "{filename}"') + file = ROOT.TFile(filename) if (not file): - raise RuntimeError(f"Something went wrong while loading the root file: {self.filename}") + raise RuntimeError(f"Something went wrong while loading the root file: {filename}") + # remove read cache + close_read_cache(file) # load workspace - if self.ws_name is None: + if ws_name is None: ws_names = [i.GetName() for i in file.GetListOfKeys() if i.GetClassName() == 'RooWorkspace'] if not ws_names: - raise RuntimeError(f"No workspaces found in the root file: {self.filename}") + raise RuntimeError(f"No workspaces found in the root file: {filename}") if len(ws_names) > 1: self.stdout.warning("Found multiple workspace instances from the root file: " f"{self.filename}. Available workspaces are \"{','.join(ws_names)}\". " f"Will choose the first one by default") - self.ws_name = ws_names[0] - ws = file.Get(self.ws_name) - elif isinstance(self.filename, ROOT.RooWorkspace): + ws_name = ws_names[0] + ws = file.Get(ws_name) + elif isinstance(filename, ROOT.RooWorkspace): file = None - ws = self.filename + ws = filename if not ws: - raise RuntimeError(f'failed to load workspace "{self.ws_name}"') - self.ws_name = ws.GetName() - self.stdout.info(f'Loaded workspace "{self.ws_name}"') + raise RuntimeError(f'failed to load workspace "{ws_name}"') + ws_name = ws.GetName() + self.config['ws_name'] = ws_name + self.stdout.info(f'Loaded workspace "{ws_name}"') # load model config - if self.mc_name is None: + if mc_name is None: mc_names = [i.GetName() for i in ws.allGenericObjects() if 'ModelConfig' in i.ClassName()] if not mc_names: - raise RuntimeError(f"no ModelConfig object found in the workspace: {self.ws_name}") + raise RuntimeError(f"no ModelConfig instance found in the workspace \"{ws_name}\"") if len(mc_names) > 1: - self.stdout.warning(f"Found multiple ModelConfig instances from the workspace: {self.ws_name}. " - f"Available ModelConfigs are \"{','.join(mc_names)}\". " - "Will choose the first one by default") - self.mc_name = mc_names[0] - model_config = ws.obj(self.mc_name) + mc_names_str = ', '.join([f'"{mc_name}"' for mc_name in mc_names]) + self.stdout.warning(f"Found multiple ModelConfig instances in the workspace \"{ws_name}\"" + f"Available ModelConfigs are {mc_names_str}. " + "Will choose the first one by default.") + mc_name = mc_names[0] + model_config = ws.obj(mc_name) + self.config['mc_name'] = mc_name if not model_config: - raise RuntimeError(f'failed to load model config "{self.mc_name}"') - self.stdout.info(f'Loaded model config "{self.mc_name}"') + raise RuntimeError(f'failed to load model config "{mc_name}"') + self.stdout.info(f'Loaded model config "{mc_name}"') # modify interpolation code - if self.interpolation_code != -1: - self.modify_interp_codes(ws, self.interpolation_code, + interpolation_code = self.config['interpolation_code'] + if interpolation_code != -1: + self.modify_interp_codes(ws, interpolation_code, classes=[ROOT.RooStats.HistFactory.FlexibleInterpVar, ROOT.PiecewiseInterpolation]) # activate binned likelihood - if self.binned_likelihood: + if self.config['do_binned_likelihood']: self.activate_binned_likelihood(ws) # set main measurement - if self.tag_as_measurement: - self.set_measurement(ws, condition=lambda name: name.startswith(self.tag_as_measurement)) + tag_as_measurement = self.config['tag_as_measurement'] + if tag_as_measurement: + self.set_measurement(ws, condition=lambda name: name.startswith(tag_as_measurement)) # deactivate level 2 constant term optimization self.deactivate_lv2_const_optimization(ws, @@ -259,19 +281,22 @@ class ExtendedModel(AbstractObject): self.stdout.info(f'Loaded model pdf "{pdf.GetName()}" from model config') # load dataset - if self.data_name is None: + data_name = self.config['data_name'] + if data_name is None: data_names = [i.GetName() for i in ws.allData()] if not data_names: - raise RuntimeError(f"no datasets found in the workspace: {ws.GetName()}") - self.data_name = data_names[0] - data = ws.data(self.data_name) + raise RuntimeError(f"no datasets found in the workspace: {ws_name}") + data_name = data_names[0] + data = ws.data(data_name) # in case there is a bug in hash map if not data: - data = [i for i in ws.allData() if i.GetName() == self.data_name] + data = [i for i in ws.allData() if i.GetName() == data_name] if not data: - raise RuntimeError(f'failed to load dataset "{self.data_name}"') + raise RuntimeError(f'failed to load dataset "{data_name}"') data = data[0] - self.stdout.info(f'Loaded dataset "{data.GetName()}" from workspace') + data_name = data.GetName() + self.config['data_name'] = data_name + self.stdout.info(f'Loaded dataset "{data_name}" from workspace') # load nuisance parameters nuisance_parameters = model_config.GetNuisanceParameters() @@ -315,6 +340,18 @@ class ExtendedModel(AbstractObject): else: category = None category_pdfs = None + + # check channel masks (for RooSimultaneousOpt) + if (pdf.InheritsFrom("RooSimultaneousOpt") or hasattr(pdf, 'channelMasks')): + channel_masks = pdf.channelMasks() + n_channel_masks = len(channel_masks) + if (n_channel_masks > 0): + active_masks = [m for m in channel_masks if m.getVal() > 0] + n_active_masks = len(active_masks) + self.stdout.info(f'{n_active_masks} out of {n_channel_masks} channels masked') + if self.stdout.verbosity <= "INFO": + self.stdout.info('Channel masks:') + channel_masks.Print() self._file = file self._workspace = ws @@ -328,12 +365,34 @@ class ExtendedModel(AbstractObject): self._observables = observables self._category = category self._floating_auxiliary_variables = None - - # Load snapshots - self.load_snapshots(self.initial_snapshots) + + self.last_fit_status = None + + # parse initial snapshots + initial_snapshots = self.config['initial_snapshots'] + if initial_snapshots is None: + initial_snapshots = [] + elif isinstance(initial_snapshots, str): + initial_snapshots = split_str(initial_snapshots, sep=',', remove_empty=True) + elif isinstance(initial_snapshots, list): + initial_snapshots = list(initial_snapshots) + else: + raise ValueError('"initial_snapshots" must be string or list of strings') + self.config['initial_snapshots'] = initial_snapshots + # load snapshots + self.load_snapshots(initial_snapshots) + + # discrete nuisance + if self.config['do_discrete_nuisance']: + self._discrete_nuisance = DiscreteNuisance(ws, pdf, verbosity=self.stdout.verbosity) + else: + self._discrete_nuisance = DiscreteNuisance(verbosity=self.stdout.verbosity) RooMsgService.remove_topics() return None + + def set_minimizer(self, minimizer:Optional["ExtendedMinimizer"]=None): + self._minimizer = minimizer def set_parameters(self, param_setup: Optional[Union[str, Sequence, Dict]] = None, params: Optional["ROOT.RooArgSet"] = None, @@ -491,6 +550,9 @@ class ExtendedModel(AbstractObject): raise ValueError(f'Object "{name}" not found in the workspace') return obj + def has_discrete_nuisance(self): + return self.discrete_nuisance.has_discrete_nuisance() + @staticmethod def randomize_globs(pdf:ROOT.RooAbsPdf, globs:ROOT.RooArgSet, seed:int): """Randomize values of global observables (for generating pseudo-experiments) @@ -1003,24 +1065,30 @@ class ExtendedModel(AbstractObject): if do_fit: if dataset is None: dataset = self.data - minimizer = self.minimizer_cls("Minimizer", self.pdf, dataset, workspace=self.workspace) - if minimizer_options is None: - minimizer_options = combine_dict(minimizer._DEFAULT_MINIMIZER_OPTION_) - if nll_options is None: - nll_options = combine_dict(minimizer._DEFAULT_NLL_OPTION_) - minimizer.configure(**minimizer_options) - if constraint_option == 0: - pass - elif constraint_option == 1: - self.set_constrained_nuisance_parameters_to_nominal() - else: - raise ValueError(f"unsupported constraint option: {constraint_option}") - if isinstance(nll_options, dict): - minimizer.configure_nll(**nll_options) - elif isinstance(nll_options, list): - minimizer.set_nll_commands(nll_options) + # create new instance of minimizer + if self.minimizer is None: + minimizer = self.minimizer_cls("Minimizer", self.pdf, dataset, workspace=self.workspace) + if minimizer_options is None: + minimizer_options = combine_dict(minimizer._DEFAULT_MINIMIZER_OPTION_) + if nll_options is None: + nll_options = combine_dict(minimizer._DEFAULT_NLL_OPTION_) + minimizer.configure(**minimizer_options) + if constraint_option == 0: + pass + elif constraint_option == 1: + self.set_constrained_nuisance_parameters_to_nominal() + else: + raise ValueError(f"unsupported constraint option: {constraint_option}") + if isinstance(nll_options, dict): + minimizer.configure_nll(**nll_options) + elif isinstance(nll_options, list): + minimizer.set_nll_commands(nll_options) + else: + raise ValueError(f"unsupported nll options format") else: - raise ValueError(f"unsupported nll options format") + self.minimizer.set_pdf(self.pdf) + self.minimizer.set_data(dataset) + minimizer = self.minimizer if poi_set is not None: uncond_fit = poi_profile is None @@ -1071,7 +1139,7 @@ class ExtendedModel(AbstractObject): mu_cond=format_mu_str(poi_profile)) else: asimov_data_name = names['asimov_no_poi'] - + if method == "old": channel_asimov_data_name = names['channel_asimov'] sim_pdf = self.pdf @@ -1285,15 +1353,18 @@ class ExtendedModel(AbstractObject): cat_label = cat_data.first cat_index = cat_data.second pdf_cat = pdf.getPdf(cat_label) - obs = pdf_cat.getObservables(self.observables) - target_obs = obs.first() + observables = pdf_cat.getObservables(self.observables) category_map[cat_label] = {} category_map[cat_label]['index'] = cat_index category_map[cat_label]['pdf'] = pdf_cat.GetName() - category_map[cat_label]['observable'] = target_obs.GetName() - bin_range = target_obs.getRange() - category_map[cat_label]['bin_range'] = (bin_range.first, bin_range.second) - category_map[cat_label]['bins'] = target_obs.getBins() + category_map[cat_label]['observables'] = {} + for observable in observables: + obs_name = observable.GetName() + bin_range = observable.getRange() + category_map[cat_label]['observables'][obs_name] = { + 'bin_range': (bin_range.first, bin_range.second), + 'bins': observable.getBins() + } return category_map def get_categories(self): @@ -1303,10 +1374,13 @@ class ExtendedModel(AbstractObject): category_map = self.get_category_map() binnings = {} for category in category_map: + if len(category_map[category]['observables']) > 1: + raise RuntimeError('rebinning of multi-observable category is not supported') + orig_binnings = next(iter(category_map[category]['observables'].values())) binnings[category] = {} - bin_range = category_map[category]['bin_range'] + bin_range = orig_binnings['bin_range'] if bins is None: - _bins = category_map[category]['bins'] + _bins = orig_binnings['bins'] elif isinstance(bins, dict): _bins = bins.get(category, None) if _bins is None: @@ -1332,7 +1406,11 @@ class ExtendedModel(AbstractObject): is_binned = {} for cat, cat_data in category_map.items(): distributions[cat] = {} - observable = category_map[cat]['observable'] + observables = category_map[cat]['observables'] + if len(observables) > 1: + raise RuntimeError('found multi-observable category') + observable = next(iter(observables)) + observable_binnings = next(iter(observables.values())) if observable not in dataset_values: raise RuntimeError(f"no data associated with the observable \"{observable}\" found in the dataset") distributions[cat]['observable'] = observable @@ -1342,7 +1420,7 @@ class ExtendedModel(AbstractObject): ind = np.argsort(x) x = x[ind] y = dataset_values['weight'][mask][ind] - default_bins = category_map[cat]['bins'] + default_bins = observable_binnings['bins'] ghost = False if np.all(y == 1.): binned = False @@ -1633,7 +1711,10 @@ class ExtendedModel(AbstractObject): binnings['all_cat'] = combine_dict(binnings[categories[0]]) for i, category in enumerate(categories): - obs_name = category_map[category]['observable'] + observables = category_map[category]['observables'] + if len(observables) > 1: + raise RuntimeError('multi-observable category is not supported') + obs_name = next(iter(observables)) if discriminant is None: xlabel = obs_name else: @@ -1694,6 +1775,8 @@ class ExtendedModel(AbstractObject): raise ValueError(f"the collection \"{variable_type}\" does not contain members of the type \"RooRealVar\"") if resolved_vtype == WSArgument.VARIABLE: variables = ROOT.RooArgSet(self.workspace.allVars()) + if self.has_discrete_nuisance(): + variables.add(self.discrete_nuisance.multipdf_cats) elif resolved_vtype == WSArgument.OBSERVABLE: if isinstance(self.pdf, ROOT.RooSimultaneous): variables = self.observables.Clone() @@ -1738,6 +1821,8 @@ class ExtendedModel(AbstractObject): if self.floating_auxiliary_variables is None: self._load_floating_auxiliary_variables() variables.add(self.floating_auxiliary_variables) + if self.has_discrete_nuisance(): + variables.add(self.discrete_nuisance.multipdf_cats) else: raise ValueError(f"unknown variable type \"{variable_type}\"") if sort: @@ -1801,11 +1886,30 @@ class ExtendedModel(AbstractObject): @staticmethod def _format_category_summary(category_name:str, category_map:Dict): - observable = category_map['observable'] - bin_range = category_map['bin_range'] - rmin, rmax = get_rmin_rmax(bin_range) - bins = category_map['bins'] - summary_str = f"{category_name} (observable = {observable}, range = [{rmin}, {rmax}], bins = {bins})" + all_names = [] + all_ranges = [] + all_bins = [] + for obs_name in category_map['observables']: + all_names.append(obs_name) + bin_range = category_map['observables'][obs_name]['bin_range'] + rmin, rmax = get_rmin_rmax(bin_range) + all_ranges.append(f'[{rmin}, {rmax}]') + bins = category_map['observables'][obs_name]['bins'] + all_bins.append(f'{bins}') + if len(all_names) == 1: + summary_str = f"{category_name} (observable = {all_names[0]}, range = {all_ranges[0]}, bins = {all_bins[0]})" + else: + summary_str = (f"{category_name} (observables = {'(' + ', '.join(all_names) + ')'}, " + f"ranges = {'(' + ', '.join(all_ranges) + ')'}, " + f"bins = {'(' + ', '.join(all_bins) + ')'})") + return summary_str + + @staticmethod + def _format_multipdf_summary(multipdf, multipdf_cat): + multipdf_name = multipdf.GetName() + category_name = multipdf_cat.GetName() + num_cat = len(multipdf_cat) + summary_str = f"{multipdf_name} (index_cat = {category_name}, num_pdfs = {num_cat})" return summary_str def print_summary(self, items:Optional[List]=None, suppress_print:bool=False, detailed:bool=True, @@ -1813,7 +1917,7 @@ class ExtendedModel(AbstractObject): save_as:Optional[str]=None): if items is None: items = ['workspace', 'dataset', 'snapshot', 'category', - 'poi', 'detailed_nuisance_parameter'] + 'poi', 'detailed_nuisance_parameter', 'multipdf'] summary_str = "" # workspace if 'workspace' in items: @@ -1840,6 +1944,14 @@ class ExtendedModel(AbstractObject): summary_str += f"Categories ({n_cat}):\n" for category in category_map: summary_str += "\t" + self._format_category_summary(category, category_map[category]) + "\n" + # multi pdfs + if 'multipdf' in items: + multipdf_cats = self.discrete_nuisance.multipdf_cats + multipdfs = self.discrete_nuisance.multipdfs + n_multipdf = len(multipdfs) + summary_str += f"MultiPdfs ({n_multipdf}):\n" + for multipdf, multipdf_cat in zip(multipdfs, multipdf_cats): + summary_str += "\t" + self._format_multipdf_summary(multipdf, multipdf_cat) + "\n" # pois, NPs param_strs = [] param_sets = [] @@ -1971,4 +2083,9 @@ class ExtendedModel(AbstractObject): if isinstance(variables, (str, WSArgument)): variables = self.get_variables(variables) self.workspace.saveSnapshot(snapshot_name, variables) - self.stdout.info(f'Saved snapshot "{snapshot_name}"') \ No newline at end of file + self.stdout.info(f'Saved snapshot "{snapshot_name}"') + + + def todo(self): + # remove NPs from fall back discrete pdf + pass \ No newline at end of file diff --git a/quickstats/components/likelihood.py b/quickstats/components/likelihood.py index 91e7b5af85680d45d75fa9ce2da70efc50869b9a..45db367bfaffd4f7823eed8a08c4fc643cf06d64 100644 --- a/quickstats/components/likelihood.py +++ b/quickstats/components/likelihood.py @@ -12,7 +12,7 @@ from quickstats import DescriptiveEnum from quickstats.maths.numerics import pretty_value from quickstats.components.basics import WSArgument from quickstats.components import AnalysisObject -from quickstats.utils.common_utils import parse_config +from quickstats.utils.common_utils import parse_config, combine_dict from quickstats.parsers import RooParamSetupParser class FitMode(DescriptiveEnum): @@ -24,11 +24,14 @@ class FitMode(DescriptiveEnum): class Likelihood(AnalysisObject): - def __init__(self, filename:str, poi_name:Optional[Union[str, List[str]]]=None, + def __init__(self, filename:str, + poi_name:Optional[Union[str, List[str]]]=None, data_name:str='combData', config:Optional[Dict]=None, - verbosity:Optional[Union[int, str]]="INFO"): + verbosity:Optional[Union[int, str]]="INFO", + **kwargs): config = parse_config(config) + config = combine_dict(config, kwargs) config['filename'] = filename config['poi_name'] = poi_name config['data_name'] = data_name @@ -73,6 +76,10 @@ class Likelihood(AnalysisObject): else: combined_fit_result['significance'] = None combined_fit_result['pvalue'] = None + if (uncond_fit_result['status'] != 0) or (cond_fit_result['status'] != 0): + combined_fit_result['status'] = -1 + else: + combined_fit_result['status'] = 0 return combined_fit_result def nll_fit(self, poi_val:Optional[Union[Dict[str, float], float]]=None, diff --git a/quickstats/components/workspaces/ws_decomposer.py b/quickstats/components/workspaces/ws_decomposer.py index 68b7c03a79e42e55ebf8f6fc2f614a18dd3b557a..c95b7aa357db84e5a38b0a566714c036b8ec1cb5 100644 --- a/quickstats/components/workspaces/ws_decomposer.py +++ b/quickstats/components/workspaces/ws_decomposer.py @@ -100,7 +100,6 @@ class WSDecomposer(XMLWSBase): with Timer() as t: model = ExtendedModel(infile, data_name=None, verbosity="WARNING") orig_cat = model.pdf.indexCat() - category_map = model.get_category_map() categories = self.parse_categories(category_expr, orig_cat) snapshots = self.parse_snapshots(model.workspace, snapshots_to_save) if not categories: @@ -129,7 +128,7 @@ class WSDecomposer(XMLWSBase): pois_cat = param_cat.selectCommon(model.pois) globs_cat = param_cat.selectCommon(model.global_observables) if rebuild_nuis: - nuis_cat = ROOT.RFUtils.SelectConstantArgs(param_cat, False) + nuis_cat = ROOT.RFUtils.GetConstantParameters(param_cat, False) nuis_cat.remove(pois_cat, False, True) nuis_cat.remove(globs_cat, False, True) else: diff --git a/quickstats/components/workspaces/xml_ws_combiner.py b/quickstats/components/workspaces/xml_ws_combiner.py index e1c76661765df54f524b41510e48b9189e544b25..5a630ceaeb34124850200f564f2cbcb9ad0e203f 100644 --- a/quickstats/components/workspaces/xml_ws_combiner.py +++ b/quickstats/components/workspaces/xml_ws_combiner.py @@ -300,7 +300,7 @@ class XMLWSCombiner(XMLWSBase): channel_config = self.channel_config[channel] model = self._get_channel_model(channel_config) strict_mode = self.combination_config["strict"] - ws_filename = model.filename + ws_filename = model.config['filename'] renamed_pdfs = list(channel_config["rename_map"]["pdf"]) renamed_vars = list(channel_config["rename_map"]["var"]) renamed_pois = list(channel_config["poi_map"].values()) @@ -317,8 +317,8 @@ class XMLWSCombiner(XMLWSBase): self._exception_check(msg, strict_mode) continue if not isinstance(pdf, ROOT.RooGaussian): - msg = f"constraint PDF \"{pdf_name}\" that is not an instance of RooGaussian is not supported" - self._exception_check(msg, strict_mode) + #msg = f"constraint PDF \"{pdf_name}\" that is not an instance of RooGaussian is not supported" + #self._exception_check(msg, strict_mode) continue nuis = model.workspace.var(nuis_name) if not nuis: diff --git a/quickstats/concurrent/__init__.py b/quickstats/concurrent/__init__.py index 0fee6f6ad0fc8ef53b64dd9fbe448e89279ea398..734a345171393363079213d3e81116a02ee26415 100644 --- a/quickstats/concurrent/__init__.py +++ b/quickstats/concurrent/__init__.py @@ -2,4 +2,5 @@ from .abstract_runner import AbstractRunner from .parameterised_runner import ParameterisedRunner from .parameterised_asymptotic_cls import ParameterisedAsymptoticCLs from .parameterised_likelihood import ParameterisedLikelihood +from .parameterised_significance import ParameterisedSignificance from .nuisance_parameter_ranking_runner import NuisanceParameterRankingRunner \ No newline at end of file diff --git a/quickstats/concurrent/abstract_runner.py b/quickstats/concurrent/abstract_runner.py index ca84390c6750ffd994781674f6b769fca2df6817..494693f597aabe654f90a354ca064ac107f2e334 100644 --- a/quickstats/concurrent/abstract_runner.py +++ b/quickstats/concurrent/abstract_runner.py @@ -10,7 +10,7 @@ class AbstractRunner(AbstractObject): def config(self): return self._config - def __init__(self, parallel:int=-1, timed:bool=True, + def __init__(self, parallel:int=-1, save_log:bool=True, cache:bool=True, verbosity:Optional[Union[int, str]]="INFO"): super().__init__(verbosity=verbosity) @@ -18,8 +18,7 @@ class AbstractRunner(AbstractObject): self._config = { 'cache': cache, 'parallel': parallel, - 'save_log': save_log, - 'timed': timed + 'save_log': save_log } def _prerun_batch(self): @@ -39,17 +38,19 @@ class AbstractRunner(AbstractObject): def _end_of_instance_cleanup(self): pass - + + @semistaticmethod def get_instance_outpath(self, kwargs:Dict): outpath = kwargs.get("outname", None) return outpath - + + @semistaticmethod def get_instance_logpath(self, kwargs:Dict): outpath = self.get_instance_outpath(kwargs) if outpath: return os.path.splitext(outpath)[0] + ".log" return None - + def _is_valid_cache(self, cached_result): return True @@ -68,7 +69,7 @@ class AbstractRunner(AbstractObject): pass logpath = self.get_instance_logpath(kwargs) - if logpath and self.config['save_log']: + if (logpath and self.config['save_log']) and (self.stdout.verbosity != 'DEBUG'): with standard_log(logpath) as logger: result = self._run_instance(**kwargs) else: @@ -84,8 +85,7 @@ class AbstractRunner(AbstractObject): self._prerun_batch() raw_result = execute_multi_tasks(self.run_instance, argument_list, parallel=parallel) results = self.postprocess(raw_result, auxiliary_args) - if self.config['timed']: - self.stdout.info(f'All jobs have finished. Total time taken: {t.interval:.3f} s') + self.stdout.info(f'All jobs have finished. Total time taken: {t.interval:.3f} s') return results def run(self): diff --git a/quickstats/concurrent/parameterised_asymptotic_cls.py b/quickstats/concurrent/parameterised_asymptotic_cls.py index 2b36a6cac6f2173a1758a2762eb59677c0149e45..ef7d1cd1e80e5c3b774ea61b3a2091e9f9ae40cd 100644 --- a/quickstats/concurrent/parameterised_asymptotic_cls.py +++ b/quickstats/concurrent/parameterised_asymptotic_cls.py @@ -1,6 +1,5 @@ import os import sys -import copy import json from typing import Optional, Union, Dict, List, Any from itertools import repeat @@ -8,7 +7,7 @@ from itertools import repeat from quickstats import semistaticmethod from quickstats.parsers import ParamParser from quickstats.concurrent import ParameterisedRunner -from quickstats.utils.common_utils import batch_makedirs, json_load +from quickstats.utils.common_utils import batch_makedirs, json_load, combine_dict, save_as_json from quickstats.components import AsymptoticCLs class ParameterisedAsymptoticCLs(ParameterisedRunner): @@ -22,22 +21,16 @@ class ParameterisedAsymptoticCLs(ParameterisedRunner): super().__init__(file_expr=file_expr, param_expr=param_expr, filter_expr=filter_expr, exclude_expr=exclude_expr, - parallel=parallel, timed=True, save_log=save_log, + parallel=parallel, save_log=save_log, + outdir=outdir, cachedir=cachedir, cache=cache, allow_none=False, verbosity=verbosity) - self.attributes = { + self.attributes.update({ 'input_path': input_path, 'config': config, - 'outdir': outdir, - 'cachedir': cachedir, 'outname': outname, 'save_summary': save_summary - } - - def _prerun_batch(self): - outdir = self.attributes['outdir'] - cache_dir = self.get_cache_dir() - batch_makedirs([outdir, cache_dir]) + }) @semistaticmethod def _prerun_instance(self, filename:str, parameters:Optional[Dict[str, Any]]=None, **kwargs): @@ -46,13 +39,7 @@ class ParameterisedAsymptoticCLs(ParameterisedRunner): else: param_str = "" self.stdout.info(f"Evaluating limit for the workspace {filename} {param_str}") - - @semistaticmethod - def _cached_return(self, outname:str): - with open(outname, 'r') as f: - limits = json_load(f) - return limits - + @semistaticmethod def _run_instance(self, filename:str, config:Dict[str, Any], parameters:Dict[str, Any], @@ -61,6 +48,7 @@ class ParameterisedAsymptoticCLs(ParameterisedRunner): **kwargs): try: config['filename'] = filename + config.update(kwargs) asymptotic_cls = AsymptoticCLs(**config) asymptotic_cls.evaluate_limits() if outname is not None: @@ -70,9 +58,6 @@ class ParameterisedAsymptoticCLs(ParameterisedRunner): sys.stdout.write(f"{e}\n") return {} - def get_cache_dir(self): - return os.path.join(self.attributes['outdir'], self.attributes['cachedir']) - def prepare_task_inputs(self): input_path = self.attributes['input_path'] cache_dir = self.get_cache_dir() @@ -82,20 +67,12 @@ class ParameterisedAsymptoticCLs(ParameterisedRunner): configs = [] base_config = self.attributes['config'] - if base_config is None: - base_config = {} for param_point in param_points: - new_config = copy.deepcopy(base_config) - int_params = param_point['internal_parameters'] - val_expr = ParamParser.val_encode_parameters(int_params) - base_fix_param = new_config.get("fix_param", None) - fix_param = [] - if base_fix_param: - fix_param.append(base_fix_param) - if val_expr: - fix_param.append(val_expr) - new_config['fix_param'] = ",".join(fix_param) - configs.append(new_config) + base_fix_params = base_config.get("fix_param", None) + internal_params = param_point['internal_parameters'] + fix_param_expr = self.join_param_setup(base_fix_params, internal_params) + config = combine_dict(base_config, {"fix_param": fix_param_expr}) + configs.append(config) param_dpd_kwargs = { 'filename': param_data['filenames'], 'config': configs, @@ -103,7 +80,8 @@ class ParameterisedAsymptoticCLs(ParameterisedRunner): 'parameters': param_data['parameters'] } param_ind_kwargs = { - 'save_summary': self.attributes['save_summary'] + 'save_summary' : self.attributes['save_summary'], + 'verbosity' : self.attributes['verbosity'] } self.set_param_ind_kwargs(**param_ind_kwargs) self.set_param_dpd_kwargs(**param_dpd_kwargs) @@ -132,12 +110,11 @@ class ParameterisedAsymptoticCLs(ParameterisedRunner): final_result.append({**params, **limit}) import pandas as pd final_result = pd.DataFrame(final_result).to_dict('list') - + + poi_name = self.attributes['config'].get('poi_name', None) outdir = self.attributes['outdir'] - outname = self.attributes['outname'] + outname = self.attributes['outname'].format(poi_name=poi_name) if outname is not None: outpath = os.path.join(outdir, outname) - with open(outpath, "w") as file: - json.dump(final_result, file, indent=2) - file.truncate() \ No newline at end of file + save_as_json(final_result, outpath) \ No newline at end of file diff --git a/quickstats/concurrent/parameterised_likelihood.py b/quickstats/concurrent/parameterised_likelihood.py index 7b449b81a5bb31f8c8988e1f726f25ea098c5b88..9ea52d6b33e2537866d418bfe5caf24551d663f2 100644 --- a/quickstats/concurrent/parameterised_likelihood.py +++ b/quickstats/concurrent/parameterised_likelihood.py @@ -10,12 +10,12 @@ import ROOT from quickstats import semistaticmethod from quickstats.parsers import ParamParser from quickstats.concurrent import ParameterisedRunner -from quickstats.utils.common_utils import batch_makedirs -from quickstats.maths.numerics import pretty_value +from quickstats.utils.common_utils import batch_makedirs, save_as_json from quickstats.components import Likelihood class ParameterisedLikelihood(ParameterisedRunner): - def __init__(self, input_file:str, param_expr:str, + + def __init__(self, input_path:str, param_expr:str, file_expr:Optional[str]=None, filter_expr:Optional[str]=None, exclude_expr:Optional[str]=None, data_name:str="combData", uncond_snapshot:Optional[str]=None, config:Optional[Dict]=None, outdir:str="output", cachedir:str="cache", @@ -23,20 +23,19 @@ class ParameterisedLikelihood(ParameterisedRunner): save_log:bool=True, parallel:int=-1, allow_nan:bool=True, verbosity:Optional[Union[int, str]]="INFO"): - super().__init__(file_expr=None, param_expr=param_expr, + super().__init__(file_expr=file_expr, param_expr=param_expr, filter_expr=filter_expr, exclude_expr=exclude_expr, - parallel=parallel, timed=True, save_log=save_log, + parallel=parallel, save_log=save_log, + outdir=outdir, cachedir=cachedir, cache=cache, allow_none=True, verbosity=verbosity) - - self.attributes = { - 'input_file': input_file, + + self.attributes.update({ + 'input_path': input_path, 'data_name': data_name, 'uncond_snapshot': uncond_snapshot, 'config': config, - 'outdir': outdir, - 'cachedir': cachedir, 'outname': outname - } + }) self.allow_nan = allow_nan @@ -45,9 +44,7 @@ class ParameterisedLikelihood(ParameterisedRunner): def _prerun_batch(self): self.stdout.tips("When running likelihood scan on an Asimov dataset, remember to restore the global " "observables to their Asimov values by loading the appropriate snapshot.") - outdir = self.attributes['outdir'] - cache_dir = self.get_cache_dir() - batch_makedirs([outdir, cache_dir]) + super()._prerun_batch() @semistaticmethod def _prerun_instance(self, filename:str, mode:int, poi_val:Optional[Union[float, Dict[str, float]]]=None, **kwargs): @@ -57,13 +54,6 @@ class ParameterisedLikelihood(ParameterisedRunner): elif mode == 1: self.stdout.info(f"Evaluating unconditional NLL for the workspace {filename}") - @semistaticmethod - def _cached_return(self, outname:str): - with open(outname, 'r') as f: - result = json.load(f) - processed_result = self._process_result(result) - return processed_result - def _is_valid_cache(self, cached_result): if (not self.allow_nan) and math.isnan(cached_result['nll']): self.stdout.info('Found cached result with nan nll. Retrying') @@ -107,10 +97,10 @@ class ParameterisedLikelihood(ParameterisedRunner): raise RuntimeError(error_msg) if config is None: config = {} - verbosity = config.pop("verbosity", "INFO") + config.update(kwargs) do_minos = config.pop("do_minos", False) likelihood = Likelihood(filename=filename, poi_name=poi_name, data_name=data_name, - config=config, verbosity=verbosity) + **config) fit_result = likelihood.nll_fit(poi_val, mode=mode, do_minos=do_minos, snapshot_name=snapshot_name) # save results @@ -124,9 +114,6 @@ class ParameterisedLikelihood(ParameterisedRunner): except Exception as e: sys.stdout.write(f"{e}\n") return None - - def get_cache_dir(self): - return os.path.join(self.attributes['outdir'], self.attributes['cachedir']) def prepare_task_inputs(self): @@ -134,47 +121,61 @@ class ParameterisedLikelihood(ParameterisedRunner): if len(poi_names) == 0: raise RuntimeError("no POI(s) to scan for") - input_file = self.attributes['input_file'] + input_path = self.attributes['input_path'] cache_dir = self.get_cache_dir() outname = "{param_str}.json" - param_points = self.get_param_points(input_file) + param_points = self.get_param_points(input_path) + n_param_points = len(param_points) param_data = self.get_serialised_param_data(param_points, outdir=cache_dir, outname=outname) - + + filenames = param_data['filenames'] + if not filenames: + raise RuntimeError(f"no input file found matching the expression: {input_path}") + + external_param_points = self.get_external_param_points(input_path) + unique_filenames = [] + outnames = [] + poi_names_text = "_".join(poi_names) + for param_point in external_param_points: + unique_filenames.append(param_point['filename']) + param_str = ParamParser.str_encode_parameters(param_point['parameters']) + if param_str: + basename = f"{param_str}_{poi_names_text}_uncond" + else: + basename = f"{poi_names_text}_uncond" + outnames.append(os.path.join(cache_dir, basename)) + + n_unique_files = len(unique_filenames) + if self.attributes['uncond_snapshot'] is None: # None is for unconditional NLL - poi_values = [None] + poi_values = [None] * n_unique_files # 1 is for unconditional NLL, 2 is for conditional NLL - modes = [1] + [2]*len(param_points) - snapshot_names = [Likelihood.kCurrentSnapshotName] * (1 + len(param_points)) + modes = [1] * n_unique_files + [2] * n_param_points + snapshot_names = [Likelihood.kCurrentSnapshotName] * (n_unique_files + n_param_points) else: - poi_values = [None] - modes = [4] + [2]*len(param_points) + poi_values = [None] * n_unique_files + modes = [4] * n_unique_files + [2] * n_param_points uncond_snapshot_name = self.attributes['uncond_snapshot'] - snapshot_names = [uncond_snapshot_name] + [Likelihood.kCurrentSnapshotName] * len(param_points) + snapshot_names = ([uncond_snapshot_name] * n_unique_files + + [Likelihood.kCurrentSnapshotName] * n_param_points) for param_point in param_points: poi_values.append(param_point['internal_parameters']) - outname_uncond = os.path.join(cache_dir, "{}_uncond.json".format("_".join(poi_names))) param_dpd_kwargs = { - 'poi_val': poi_values, - 'mode': modes, - 'snapshot_name': snapshot_names, - 'outname': [outname_uncond] + param_data['outnames'] + 'filename' : unique_filenames + filenames, + 'poi_val' : poi_values, + 'mode' : modes, + 'snapshot_name' : snapshot_names, + 'outname' : outnames + param_data['outnames'] } - filename = list(set(param_data['filenames'])) - - if len(filename) == 0: - raise RuntimeError(f"no input file found matching the expression: {input_file}") - if len(filename) > 1: - raise RuntimeError("multiple input files detected: {}".format(", ".join(filename))) - param_ind_kwargs = { - 'filename': filename[0], - 'poi_name': self.attributes['poi_names'], - 'data_name': self.attributes['data_name'], - 'config': self.attributes['config'] + 'poi_name' : self.attributes['poi_names'], + 'data_name' : self.attributes['data_name'], + 'config' : self.attributes['config'], + 'verbosity' : self.attributes['verbosity'] } self.set_param_ind_kwargs(**param_ind_kwargs) @@ -234,5 +235,4 @@ class ParameterisedLikelihood(ParameterisedRunner): outdir = self.attributes['outdir'] outname = self.attributes['outname'].format(poi_names="_".join(poi_names)) outpath = os.path.join(outdir, outname.format(poi_name=poi_name)) - with open(outpath, 'w') as outfile: - json.dump(data, outfile, indent=3) \ No newline at end of file + save_as_json(data, outpath) \ No newline at end of file diff --git a/quickstats/concurrent/parameterised_runner.py b/quickstats/concurrent/parameterised_runner.py index 824bb4408dda9fc56d1f41fe1c1d9a2a36bc1e7c..531ca677d55be85cb2b25bbf5301d4072003b870 100644 --- a/quickstats/concurrent/parameterised_runner.py +++ b/quickstats/concurrent/parameterised_runner.py @@ -1,9 +1,12 @@ from typing import Optional, List, Dict, Union, Tuple import os +import ROOT + +from quickstats import semistaticmethod from . import AbstractRunner from quickstats.parsers import ParamParser -from quickstats.utils.common_utils import combine_dict +from quickstats.utils.common_utils import combine_dict, batch_makedirs class ParameterisedRunner(AbstractRunner): @@ -19,28 +22,64 @@ class ParameterisedRunner(AbstractRunner): param_expr:Optional[str]=None, filter_expr:Optional[str]=None, exclude_expr:Optional[str]=None, - parallel:int=-1, timed:bool=True, + outdir:str="output", cachedir:str="cache", save_log:bool=True, cache:bool=True, - allow_none:bool=True, + allow_none:bool=True, parallel:int=-1, verbosity:Optional[Union[int, str]]="INFO"): - super().__init__(parallel=parallel, timed=timed, save_log=save_log, + super().__init__(parallel=parallel, save_log=save_log, cache=cache, verbosity=verbosity) + + self.attributes = { + 'outdir' : outdir, + 'cachedir' : cachedir, + 'verbosity': verbosity + } + self.filter_expr = filter_expr self.exclude_expr = exclude_expr self._parser = ParamParser(file_expr, param_expr, allow_none=allow_none) self._param_points = None self.param_ind_kwargs = {} self.param_dpd_kwargs = {} + + def _prerun_batch(self): + outdir = self.attributes['outdir'] + cache_dir = self.get_cache_dir() + batch_makedirs([outdir, cache_dir]) + + @semistaticmethod + def _cached_return(self, outname:str): + with open(outname, 'r') as f: + result = json.load(f) + processed_result = self._process_result(result) + return processed_result + + @semistaticmethod + def _process_result(self, result:Dict): + return result def setup_parser(self, file_expr:Optional[str]=None, param_expr:Optional[str]=None): self._parser.setup(file_expr, param_expr) + + def get_cache_dir(self): + return os.path.join(self.attributes['outdir'], self.attributes['cachedir']) def get_param_points(self, input_path:str): param_points = self.parser.get_param_points(input_path, filter_expr=self.filter_expr, exclude_expr=self.exclude_expr) return param_points + + def get_internal_param_points(self): + param_points = self.parser.get_internal_param_points(filter_expr=self.filter_expr, + exclude_expr=self.exclude_expr) + return param_points + + def get_external_param_points(self, input_path:str): + param_points = self.parser.get_external_param_points(input_path, filter_expr=self.filter_expr, + exclude_expr=self.exclude_expr) + return param_points def get_serialised_param_data(self, param_points:str, outdir:str="./", outname:str="{param_str}.json"): filenames = [] @@ -58,10 +97,9 @@ class ParameterisedRunner(AbstractRunner): parameter_list.append(parameters) outnames.append(outname) if len(outnames) != len(set(outnames)): - raise RuntimeError("output names are not distinct, please check your input. " - "(this can be due to 1. multiple tasks have the same set of " - "parameters or 2. multiple zero-parameter tasks with input files of the same" - " basename)") + raise RuntimeError("output names are not distinct, please check your input. (this can be " + "due to 1. multiple tasks have the same set of parameters or 2. " + "multiple zero-parameter tasks with input files of the same basename)") serialised_param_data = { 'filenames': filenames, 'outnames': outnames, @@ -89,7 +127,20 @@ class ParameterisedRunner(AbstractRunner): def run(self): kwarg_set, auxiliary_args = self.prepare_task_inputs() return self.run_batch(kwarg_set, auxiliary_args=auxiliary_args) - + def _end_of_instance_cleanup(self): - import ROOT - ROOT.gROOT.CloseFiles() \ No newline at end of file + ROOT.gROOT.CloseFiles() + + @staticmethod + def join_param_setup(base_setup:Optional[str]=None, new_setup:Optional[str]=None): + components = [] + for setup in [base_setup, new_setup]: + if not setup: + continue + if isinstance(setup, dict): + setup = ParamParser.val_encode_parameters(setup) + assert isinstance(setup, str) + components.append(setup) + if not components: + return None + return ",".join(components) \ No newline at end of file diff --git a/quickstats/concurrent/parameterised_significance.py b/quickstats/concurrent/parameterised_significance.py new file mode 100644 index 0000000000000000000000000000000000000000..c2b993a7e7cbc5598656f5b589436cd1b2c3ef03 --- /dev/null +++ b/quickstats/concurrent/parameterised_significance.py @@ -0,0 +1,167 @@ +import os +import sys +import json +from typing import Optional, Union, Dict, List, Any + +from quickstats import semistaticmethod +from quickstats.parsers import ParamParser +from quickstats.concurrent import ParameterisedRunner +from quickstats.utils.common_utils import batch_makedirs, list_of_dict_to_dict_of_list, save_as_json, combine_dict +from quickstats.maths.numerics import pretty_value +from quickstats.components import AnalysisBase, AsimovType, AsimovGenerator + +class ParameterisedSignificance(ParameterisedRunner): + + def __init__(self, input_path:str, + file_expr:Optional[str]=None, + param_expr:Optional[str]=None, + poi_name:Optional[str]=None, + filter_expr:Optional[str]=None, + exclude_expr:Optional[str]=None, + data_name:str="combData", + snapshot_name:Optional[str]=None, + mu_exp:float=0., + asimov_type:Optional[int]=None, + config:Optional[Dict]=None, + outdir:str="output", cachedir:str="cache", + outname:str="{param_names}.json", cache:bool=True, + save_log:bool=True, parallel:int=-1, + verbosity:Optional[Union[int, str]]="INFO"): + + super().__init__(file_expr=file_expr, + param_expr=param_expr, + filter_expr=filter_expr, + exclude_expr=exclude_expr, + outdir=outdir, cachedir=cachedir, + parallel=parallel, save_log=save_log, + cache=cache, verbosity=verbosity) + + self.attributes.update({ + 'input_path': input_path, + 'poi_name': poi_name, + 'data_name': data_name, + 'snapshot_name': snapshot_name, + 'mu_exp': mu_exp, + 'asimov_type': asimov_type, + 'config': config, + 'outname': outname + }) + + def _prerun_batch(self): + self.stdout.tips("When running likelihood fit on an Asimov dataset, remember to restore the global " + "observables to their Asimov values by loading the appropriate snapshot. " + "Asimov dataset generated on the fly (via the --asimov_type option) will automatically " + "load the internally saved snapshot so no user intervention is needed.") + super()._prerun_batch() + + @semistaticmethod + def _prerun_instance(self, filename:str, parameters:Optional[Union[float, Dict[str, float]]]=None, **kwargs): + param_str = "("+ParamParser.val_encode_parameters(parameters)+")" + self.stdout.info(f"Evaluating significance {param_str} for the workspace {filename}") + + @semistaticmethod + def _run_instance(self, filename:str, + poi_name:Optional[str]=None, + data_name:str="combData", + config:Optional[Dict]=None, + mu_exp:float=0., + asimov_type:Optional[int]=None, + snapshot_name:Optional[str]=None, + outname:Optional[str]=None, + **kwargs): + try: + if config is None: + config = {} + config.update(kwargs) + config['filename'] = filename + config['poi_name'] = poi_name + config['data_name'] = data_name + config['snapshot_name'] = snapshot_name + analysis = AnalysisBase(**config) + if asimov_type is not None: + asimov_type = AsimovType.parse(asimov_type) + asimov_data = analysis.generate_standard_asimov(asimov_type, do_import=False) + asimov_snapshot = AsimovGenerator.ASIMOV_SETTINGS[asimov_type]['asimov_snapshot'] + analysis.set_data(asimov_data) + result = analysis.nll_fit(poi_val=mu_exp, mode='hybrid', + snapshot_name=asimov_snapshot) + else: + result = analysis.nll_fit(poi_val=mu_exp, mode='hybrid') + if outname: + with open(outname, 'w') as outfile: + json.dump(result, outfile, indent=2) + return result + except Exception as e: + sys.stdout.write(f"{e}\n") + return None + + def prepare_task_inputs(self): + input_path = self.attributes['input_path'] + cache_dir = self.get_cache_dir() + outname = "{param_str}.json" + param_points = self.get_param_points(input_path) + param_data = self.get_serialised_param_data(param_points, outdir=cache_dir, outname=outname) + + configs = [] + base_config = self.attributes['config'] + if base_config is None: + base_config = {} + for param_point in param_points: + base_fix_params = base_config.get("fix_param", None) + internal_params = param_point['internal_parameters'] + fix_param_expr = self.join_param_setup(base_fix_params, internal_params) + config = combine_dict(base_config, {"fix_param": fix_param_expr}) + configs.append(config) + param_dpd_kwargs = { + 'parameters' : param_data['parameters'], # just for display + 'filename' : param_data['filenames'], + 'outname' : param_data['outnames'], + 'config' : configs + } + + param_ind_kwargs = {} + for param in ['poi_name', 'data_name', 'snapshot_name', + 'mu_exp', 'asimov_type']: + param_ind_kwargs[param] = self.attributes[param] + + self.set_param_ind_kwargs(**param_ind_kwargs) + self.set_param_dpd_kwargs(**param_dpd_kwargs) + kwarg_set = self.create_kwarg_set() + auxiliary_args = { + 'parameters': param_data['parameters'] + } + if not kwarg_set: + raise RuntimeError("no parameter point to scan for") + return kwarg_set, auxiliary_args + + def postprocess(self, raw_result, auxiliary_args:Dict): + parameters = auxiliary_args['parameters'] + data = list_of_dict_to_dict_of_list(parameters) + param_names = list(data.keys()) + data.update({ + 'nll_muexp' : [], + 'nll_muhat' : [], + 'qmu' : [], + 'muexp' : [], + 'muhat' : [], + 'significance' : [], + 'pvalue' : [], + 'status_muexp' : [], + 'status_muhat' : [], + 'status' : [] + }) + for result in raw_result: + data['nll_muexp'].append(result['cond_fit']['nll']) + data['nll_muhat'].append(result['uncond_fit']['nll']) + data['qmu'].append(result['qmu']) + data['muexp'].append(next(iter(result['cond_fit']['mu'].values()))) + data['muhat'].append(next(iter(result['uncond_fit']['muhat'].values()))) + data['significance'].append(result['significance']) + data['pvalue'].append(result['pvalue']) + data['status_muexp'].append(result['cond_fit']['status']) + data['status_muhat'].append(result['uncond_fit']['status']) + data['status'].append(result['status']) + outdir = self.attributes['outdir'] + outname = self.attributes['outname'].format(param_names="_".join(param_names)) + outpath = os.path.join(outdir, outname) + save_as_json(data, outpath) \ No newline at end of file diff --git a/quickstats/core/io.py b/quickstats/core/io.py index 0978f7a8b5a999abd882a483fc1e06e58e567f16..f5e9eeb881200f434bb2423a395697dc7ef89038 100644 --- a/quickstats/core/io.py +++ b/quickstats/core/io.py @@ -29,7 +29,8 @@ text_color_map = { 'bright cyan': '\033[36;1m', 'bright white': '\033[37;1m', 'darkred': '\033[91m', - 'reset': '\033[0m' + 'reset': '\033[0m', + 'okgreen': '\033[92m' } def get_colored_text(text: str, color: str) -> str: diff --git a/quickstats/core/path_manager.py b/quickstats/core/path_manager.py index 882060c55809300571551fa10198fd0ce6ba5c24..b300eeb11513d4660c92a3d94e8b22569af9f3f6 100644 --- a/quickstats/core/path_manager.py +++ b/quickstats/core/path_manager.py @@ -100,13 +100,17 @@ class PathManager: def set_file(self, file_name:str, file:Union[str, DynamicFilePath, Tuple[Optional[str], str]]): self.update_files({file_name: file}) + + def get_base_path(self): + return self.base_path - def get_directory(self, directory_name:str, check_exist:bool=False): + def get_directory(self, directory_name:str, check_exist:bool=False, **parameters): if directory_name not in self.directories: raise KeyError(f'unrecognized directory name "{directory_name}"') - if self.base_path is not None: - return os.path.join(self.base_path, self.directories[directory_name]) - directory = self.directories[directory_name] + base_path = self.get_base_path() + if base_path is not None: + return os.path.join(base_path, self.directories[directory_name]) + directory = self.directories[directory_name].format(**parameters) if check_exist: if not os.path.exists(directory): raise FileNotFoundError(f'directory "{directory}" does not exist') @@ -147,23 +151,23 @@ class PathManager: if os.path.isdir(file): raise ValueError(f'"{file}" is a directory') - def get_file(self, file_name:str, check_exist:bool=False): - return self.get_resolved_file(file_name, check_exist=check_exist) + def get_file(self, file_name:str, check_exist:bool=False, **parameters): + return self.get_resolved_file(file_name, check_exist=check_exist, **parameters) - def get_directories(self, directory_names:Optional[List[str]]=None): + def get_directories(self, directory_names:Optional[List[str]]=None, **parameters): directory_paths = {} if directory_names is None: directory_names = list(self.directories.keys()) for directory_name in directory_names: - directory_paths[directory_name] = self.get_directory(directory_name) + directory_paths[directory_name] = self.get_directory(directory_name, **parameters) return directory_paths - def get_files(self, file_names:Optional[List[str]]=None): + def get_files(self, file_names:Optional[List[str]]=None, **parameters): file_paths = {} if file_names is None: file_names = list(self.files.keys()) for file_name in file_names: - file_paths[file_name] = self.get_file(file_name) + file_paths[file_name] = self.get_file(file_name, **parameters) return file_paths def get_relpath(self, path:str): @@ -171,39 +175,44 @@ class PathManager: return path return os.path.join(self.base_path, path) - def directory_exists(self, directory_name:str): - directory = self.get_directory(directory_name) + def directory_exists(self, directory_name:str, **parameters): + directory = self.get_directory(directory_name, **parameters) return os.path.exists(directory) def file_exists(self, file_name:str, **parameters): file = self.get_resolved_file(file_name, **parameters) return os.path.exists(file) - def check_directory(self, directory_name:str): - self.get_directory(directory_name, check_exist=True) + def check_directory(self, directory_name:str, **parameters): + self.get_directory(directory_name, **parameters, check_exist=True) def check_file(self, file_name:str, **parameters): self.get_resolved_file(file_name, check_exist=True, **parameters) def makedirs(self, include_names:Optional[List[str]]=None, - exclude_names:Optional[List[str]]=None): + exclude_names:Optional[List[str]]=None, + **parameters): if include_names is None: include_names = list(self.directories.keys()) if exclude_names is None: exclude_names = [] - directory_names = list(set(include_names) - set(exclude_names)) - for directory_name in directory_names: - if directory_name not in self.directories: - raise KeyError(f'unrecognized directory name "{directory_name}"') - dirname = self.get_directory(directory_name) - if not os.path.exists(dirname): - os.makedirs(dirname) - - def makedir_for_files(self, file_names:Union[str, List[str]]): + dirnames = list(set(include_names) - set(exclude_names)) + resolved_dirnames = [] + for dirname in dirnames: + if dirname not in self.directories: + raise KeyError(f'unrecognized directory name "{dirname}"') + resolved_dirname = self.get_directory(dirname, **parameters) + resolved_dirnames.append(resolved_dirname) + from quickstats.utils.common_utils import batch_makedirs + batch_makedirs(resolved_dirnames) + + def makedir_for_files(self, file_names:Union[str, List[str]], **parameters): if isinstance(file_names, str): file_names = [file_names] - files = self.get_files(file_names) + files = self.get_files(file_names, **parameters) + resolved_dirnames = [] for file in files.values(): dirname = os.path.dirname(file) - if (dirname != "") and not os.path.exists(dirname): - os.makedirs(dirname) \ No newline at end of file + resolved_dirnames.append(dirname) + from quickstats.utils.common_utils import batch_makedirs + batch_makedirs(resolved_dirnames) \ No newline at end of file diff --git a/quickstats/interface/root/ModelConfig.py b/quickstats/interface/root/ModelConfig.py new file mode 100644 index 0000000000000000000000000000000000000000..8584c3ec1e8792c3c61b2878f0dc423d109813a6 --- /dev/null +++ b/quickstats/interface/root/ModelConfig.py @@ -0,0 +1,83 @@ +from typing import Dict, Union, List, Optional, Tuple +import os + +import numpy as np + +from quickstats import semistaticmethod, AbstractObject +from quickstats import GeneralEnum +from .RooArgSet import RooArgSet + +class ObjectType(GeneralEnum): + + PDF = (0, "pdf", "pdf") + OBS = (1, "observables", "observable") + POIS = (2, "parameters of interest", "parameter of interest") + NUIS = (3, "nuisance parameters", "nuisane parameter") + GLOBS = (4, "global observables", "global observable") + + def __new__(cls, value:int, plural_str:str, singular_str:str): + obj = object.__new__(cls) + obj._value_ = value + obj.plural_str = plural_str + obj.singular_str = singular_str + return obj + +class ModelConfig(AbstractObject): + + @semistaticmethod + def sanity_check(self, mc:"ROOT.RooStats.ModelConfig"): + pass_check = True + def check_objects(objects, object_type:ObjectType, + require_exist:bool=True, + require_class:Optional[str]=None, + require_dependence:Optional["ROOT.RooAbsArg"]=None, + dep_object_type:ObjectType=ObjectType.PDF): + if require_exist and (not objects): + self.stdout.error(f'{object_type.plural_str} not defined in model config') + return False + if require_class is not None: + assert objects.InheritsFrom("RooArgSet") + invalid_objects = RooArgSet.exclude_by_class(objects, require_class) + if invalid_objects: + for invalid_object in invalid_objects: + classname = invalid_object.ClassName() + name = invalid_object.GetName() + self.stdout.error(f'{object_type.singular_str} "{name}" is an instance of ' + f'{classname} but not {require_class}') + return False + if require_dependence is not None: + assert objects.InheritsFrom("RooArgSet") + assert require_dependence.InheritsFrom("RooAbsArg") + dependent_objects = RooArgSet.select_dependent_parameters(objects, require_dependence) + invalid_objects = objects.Clone() + invalid_objects.remove(dependent_objects) + if invalid_objects: + for invalid_object in invalid_objects: + classname = invalid_object.ClassName() + name = invalid_object.GetName() + self.stdout.error(f'{dep_object_type.singular_str} does not depend on ' + f'{object_type.singular_str} "{name}"') + return False + return True + pdf = mc.GetPdf() + # check pdf + pass_check &= check_objects(pdf, ObjectType.PDF) + # skip subsequent checks if pdf does not exist + if not pdf: + return False + # check observables + pass_check &= check_objects(mc.GetObservables(), ObjectType.OBS) + # check parameters of interest + pass_check &= check_objects(mc.GetParametersOfInterest(), ObjectType.POIS, + require_class="RooRealVar", require_dependence=pdf) + # check nuisance parameters + pass_check &= check_objects(mc.GetNuisanceParameters(), ObjectType.NUIS, + require_class="RooRealVar", require_dependence=pdf) + # check nuisance parameters + pass_check &= check_objects(mc.GetNuisanceParameters(), ObjectType.GLOBS, + require_class="RooRealVar", require_dependence=pdf) + + # check factorize pdf (needed?) + + return pass_check + \ No newline at end of file diff --git a/quickstats/interface/root/RooAbsPdf.py b/quickstats/interface/root/RooAbsPdf.py index c2c562da3bc9d0578c9cbd2228d7e2ff3bfe8ec5..ef4ca363ae6f56d92756112e1c0b4fae7d836802 100644 --- a/quickstats/interface/root/RooAbsPdf.py +++ b/quickstats/interface/root/RooAbsPdf.py @@ -7,6 +7,8 @@ import numpy as np from quickstats import semistaticmethod, AbstractObject from quickstats.interface.cppyy.vectorize import as_np_array, as_vector, np_type_str_maps from .TH1 import TH1 +from .TH2 import TH2 +from .TH3 import TH3 from .TArrayData import TArrayData class RooAbsPdf(AbstractObject): @@ -204,6 +206,9 @@ class RooAbsPdf(AbstractObject): if channel_cat not in pdf_observables: pdf_observables.add(channel_cat) from quickstats.interface.root import RooDataSet + # reset initial values of observables + for obs in pdf_observables: + obs.setBin(0) c_dataset_map = RooDataSet.get_dataset_map(dataset_map) asimov_data = ROOT.RooDataSet(dataset_name, dataset_name, pdf_observables, @@ -212,8 +217,50 @@ class RooAbsPdf(AbstractObject): ROOT.RooFit.WeightVar(weight_var)) return asimov_data + # has precision issue since the histograms are always of type "float" + @semistaticmethod + def _get_histo_values_old(self, pdf:"ROOT.RooAbsPdf", + obs_x:"ROOT.RooRealVar", + obs_y:Optional["ROOT.RooRealVar"]=None, + obs_z:Optional["ROOT.RooRealVar"]=None): + import ROOT + var_y = ROOT.RooFit.YVar(obs_y) if obs_y is not None else ROOT.RooCmdArg.none() + var_z = ROOT.RooFit.ZVar(obs_z) if obs_z is not None else ROOT.RooCmdArg.none() + rhist = pdf.createHistogram(f'{uuid.uuid4().hex}', obs_x, var_y, var_z) + ndim = rhist.GetDimension() + if ndim == 1: + result = TH1.GetBinContentArray(rhist).flatten() + elif ndim == 2: + result = TH2.GetBinContentArray(rhist).flatten() + elif ndim == 3: + result = TH3.GetBinContentArray(rhist).flatten() + else: + raise RuntimeError('histogram dimension must be 1, 2 or 3') + rhist.Delete() + return result + + @semistaticmethod + def _get_histo_values(self, pdf:"ROOT.RooAbsPdf", observables:"ROOT.RooRealVar"): + rhist = self.create_histogram(f'{uuid.uuid4().hex}', observables, 'double') + if pdf.canBeExtended(): + scale_factor = pdf.expectedEvents(observables) + else: + scale_factor = 1.0 + pdf.fillHistogram(rhist, observables, scale_factor, 0, False) + ndim = rhist.GetDimension() + if ndim == 1: + result = TH1.GetBinContentArray(rhist).flatten() + elif ndim == 2: + result = TH2.GetBinContentArray(rhist).flatten() + elif ndim == 3: + result = TH3.GetBinContentArray(rhist).flatten() + else: + raise RuntimeError('histogram dimension must be 1, 2 or 3') + rhist.Delete() + return result + @semistaticmethod - def get_asimov_dataset(self, pdf:"RooAbsPdf", + def get_asimov_dataset(self, pdf:"ROOT.RooAbsPdf", observables:"ROOT.RooArgSet", weight_name:Optional[str]="weight", dataset_name:Optional[str]="asimovData", @@ -243,9 +290,14 @@ class RooAbsPdf(AbstractObject): np_ds = {} for i, obs_name in enumerate(bin_centers): np_ds[obs_name] = bin_center_combination[:, i] - ds = RooDataSet.from_numpy(np_ds, pdf_observables) - pdf_values = ROOT.RFUtils.GetPdfValuesAcrossObsDataset(pdf, ds, True) - pdf_values = as_np_array(pdf_values) + nobs = len(bin_centers) + # use histogram method for low dimensions + if nobs in [1, 2, 3]: + pdf_values = self._get_histo_values(pdf, pdf_observables) + else: + ds = RooDataSet.from_numpy(np_ds, pdf_observables) + pdf_values = ROOT.RFUtils.GetPdfValuesAcrossObsDataset(pdf, ds, True) + pdf_values = as_np_array(pdf_values) # multiply by bin width(s) num_events = pdf_values * np.prod(bin_width_combination, axis=1) np_ds[weight_name] = num_events @@ -320,3 +372,62 @@ class RooAbsPdf(AbstractObject): pdf_title = pdf.GetTitle() new_pdf = ROOT.RooProdPdf(pdf_name, pdf_title, base_components) return new_pdf + + @semistaticmethod + def create_histogram(self, name:str, observables:"ROOT.RooArgSet", dtype:str="double"): + ndim = len(observables) + if (ndim < 1) or (ndim > 3): + raise ValueError('dimension not supported') + binnings = [observable.getBinning() for observable in observables] + if dtype not in ["float", "double"]: + raise RuntimeError('dtype must be "float" or "double"') + import ROOT + if ndim == 1: + cls = ROOT.TH1D if dtype == "double" else ROOT.TH1F + if binnings[0].isUniform(): + histogram = cls(name, name, + binnings[0].numBins(), + binnings[0].lowBound(), + binnings[0].highBound()) + else: + histogram = cls(name, name, + binnings[0].numBins(), + binnings[0].array()) + elif ndim == 2: + cls = ROOT.TH2D if dtype == "double" else ROOT.TH2F + if (binnings[0].isUniform() and binnings[1].isUniform()): + histogram = cls(name, name, + binnings[0].numBins(), + binnings[0].lowBound(), + binnings[0].highBound(), + binnings[1].numBins(), + binnings[1].lowBound(), + binnings[1].highBound()) + else: + histogram = cls(name, name, + binnings[0].numBins(), + binnings[0].array(), + binnings[1].numBins(), + binnings[1].array()) + else: + cls = ROOT.TH3D if dtype == "double" else ROOT.TH3F + if (binnings[0].isUniform() and binnings[1].isUniform() and binnings[2].isUniform()): + histogram = cls(name, name, + binnings[0].numBins(), + binnings[0].lowBound(), + binnings[0].highBound(), + binnings[1].numBins(), + binnings[1].lowBound(), + binnings[1].highBound(), + binnings[2].numBins(), + binnings[2].lowBound(), + binnings[2].highBound()) + else: + histogram = cls(name, name, + binnings[0].numBins(), + binnings[0].array(), + binnings[1].numBins(), + binnings[1].array(), + binnings[2].numBins(), + binnings[2].array()) + return histogram diff --git a/quickstats/interface/root/RooArgSet.py b/quickstats/interface/root/RooArgSet.py index e29fdf2d058f2ccd0bf70a049feca6db90e14b64..6f86bbc23f71e9566ad60fb358c4e5a0032dec41 100644 --- a/quickstats/interface/root/RooArgSet.py +++ b/quickstats/interface/root/RooArgSet.py @@ -2,8 +2,10 @@ from typing import Dict, Union, List, Optional import numpy as np +import cppyy + from quickstats import semistaticmethod -from quickstats.interface.cppyy.vectorize import as_np_array +from quickstats.interface.cppyy.vectorize import as_vector class RooArgSet: @@ -15,4 +17,56 @@ class RooArgSet: @staticmethod def sort(argset:"ROOT.RooArgSet"): argset.sort() - return argset \ No newline at end of file + return argset + + def get_boundary_parameters(argset:"ROOT.RooArgSet"): + return cppyy.gbl.RFUtils.GetBoundaryParameters(argset) + + def select_by_class(argset:"ROOT.RooArgSet", classname:str): + return cppyy.gbl.RFUtils.SelectByClass(argset, classname) + + def exclude_by_class(argset:"ROOT.RooArgSet", classname:str): + return cppyy.gbl.RFUtils.ExcludeByClass(argset, classname) + + def select_dependent_parameters(argset:"ROOT.RooArgSet", source:"ROOT.RooAbsArg"): + return cppyy.gbl.RFUtils.SelectDependentParameters(argset, source) + + def get_set_difference(argset1:"ROOT.RooArgSet", argset2:"ROOT.RooArgSet"): + return cppyy.gbl.RFUtils.GetRooArgSetDifference(argset1, argset2) + + def set_constant_state(argset:"ROOT.RooArgSet", value:bool=True): + argset.setAttribAll('Constant', value) + + def select_by_constant_state(argset:"ROOT.RooArgSet", value:bool=True): + return argset.selectByAttrib('Constant', value) + + def get_parameters_close_to_min(argset:"ROOT.RooArgSet", threshold:float=0.1): + return cppyy.gbl.RFUtils.GetParametersCloseToMin(argset, threshold) + + def get_parameters_close_to_max(argset:"ROOT.RooArgSet", threshold:float=0.1): + return cppyy.gbl.RFUtils.GetParametersCloseToMax(argset, threshold) + + def get_parameters_close_to_boundary(argset:"ROOT.RooArgSet", threshold:float=0.1): + return cppyy.gbl.RFUtils.GetParametersCloseToBoundary(argset, threshold) + + def expand_parameters_range(argset:"ROOT.RooArgSet", threshold:float=0.1, + expand_min:bool=True, expand_max:bool=True, + orig_argset_at_min:Optional["ROOT.RooArgSet"]=None, + new_argset_at_min:Optional["ROOT.RooArgSet"]=None, + orig_argset_at_max:Optional["ROOT.RooArgSet"]=None, + new_argset_at_max:Optional["ROOT.RooArgSet"]=None,): + orig_argset_at_min = 0 if orig_argset_at_min is None else orig_argset_at_min + new_argset_at_min = 0 if new_argset_at_min is None else new_argset_at_min + orig_argset_at_max = 0 if orig_argset_at_max is None else orig_argset_at_max + new_argset_at_max = 0 if new_argset_at_max is None else new_argset_at_max + return cppyy.gbl.RFUtils.ExpandParametersRange(argset, threshold, + expand_min, expand_max, + orig_argset_at_min, + new_argset_at_min, + orig_argset_at_max, + new_argset_at_max) + def set_category_indices(argset:"ROOT.RooArgSet", indices:np.ndarray): + return cppyy.gbl.RFUtils.SetCategoryIndices(argset, as_vector(indices)) + + def save_data_as_txt(argset:"ROOT.RooArgSet", filename:str, precision:int=7): + return cppyy.gbl.RFUtils.SaveRooArgSetDataAsTxt(argset, filename, precision) \ No newline at end of file diff --git a/quickstats/interface/root/RooRealVar.py b/quickstats/interface/root/RooRealVar.py index 672c5dc4a1825566d9a749ba81c2649440ad8e9c..daae909dd3f155c6cc71060311ca45e0be7870ad 100644 --- a/quickstats/interface/root/RooRealVar.py +++ b/quickstats/interface/root/RooRealVar.py @@ -5,6 +5,8 @@ from quickstats import semistaticmethod from quickstats.maths.numerics import get_proper_ranges from quickstats.interface.root import TArrayData +import cppyy + class RooRealVar: @property @@ -158,4 +160,20 @@ class RooRealVar: 'bin_center': bin_edge_to_bin_center(bin_low_edge), 'bin_width': bin_edge_to_bin_width(bin_low_edge) } - return result \ No newline at end of file + return result + + @staticmethod + def at_boundary(obj:"ROOT.RooRealVar"): + return cppyy.gbl.RFUtils.ParameterAtBoundary(obj) + + @staticmethod + def close_to_min(obj:"ROOT.RooRealVar", threshold:float=0.1): + return cppyy.gbl.RFUtils.ParameterCloseToMin(obj, threshold) + + @staticmethod + def close_to_min(obj:"ROOT.RooRealVar", threshold:float=0.1): + return cppyy.gbl.RFUtils.ParameterCloseToMax(obj, threshold) + + @staticmethod + def close_to_boundary(obj:"ROOT.RooRealVar", threshold:float=0.1): + return cppyy.gbl.RFUtils.ParameterCloseToBoundary(obj, threshold) \ No newline at end of file diff --git a/quickstats/interface/root/TH2.py b/quickstats/interface/root/TH2.py index e74e71e8945090d80f7671333e75520f60dc3da6..391e4f5fd832c802c31a526daf00cea3bfcdfa3b 100644 --- a/quickstats/interface/root/TH2.py +++ b/quickstats/interface/root/TH2.py @@ -27,12 +27,12 @@ class TH2(TObject): def get_fundamental_type(self): import ROOT - raise ROOT.TH2 + return ROOT.TH2 def init(self, h): self.bin_content = self.GetBinContentArray(h, self.dtype, self.underflow_bin, self.overflow_bin) - self.x_labels = self.GetXLabelArray(h) - self.y_labels = self.GetYLabelArray(h) + #self.x_labels = self.GetXLabelArray(h) + #self.y_labels = self.GetYLabelArray(h) self.x_bin_center = self.GetXBinCenterArray(h, self.dtype, self.underflow_bin, self.overflow_bin) self.y_bin_center = self.GetYBinCenterArray(h, self.dtype, self.underflow_bin, self.overflow_bin) self.x_bin_width = self.GetXBinWidthArray(h, self.dtype, self.underflow_bin, self.overflow_bin) @@ -47,7 +47,8 @@ class TH2(TObject): n_bins_y = h.GetNbinsY() # account for underflow and overflow bins size = (n_bins_x + 2) * (n_bins_y + 2) - np_arr = c_array_to_np_array(arr, size=size, shape=(n_bins_x + 2, n_bins_y + 2)) + np_arr = c_array_to_np_array(arr, size=size) + np_arr = np_arr.reshape((n_bins_x + 2, n_bins_y + 2), order='F') x_start = 1 if not underflow_bin else 0 y_start = x_start x_end = -1 if not overflow_bin else n_bins_x + 2 diff --git a/quickstats/interface/root/TH3.py b/quickstats/interface/root/TH3.py new file mode 100644 index 0000000000000000000000000000000000000000..fc7d9ad0d8cd2e51ea091cdff2ffb9893738325f --- /dev/null +++ b/quickstats/interface/root/TH3.py @@ -0,0 +1,127 @@ +import array +import numpy as np + +from quickstats import semistaticmethod +from quickstats.interface.root import TObject, TArrayData +from quickstats.interface.cppyy.vectorize import c_array_to_np_array + +class TH3(TObject): + + DTYPE_MAP = { + "TH3I": "int", + "TH3F": "float", + "TH3D": "double" + } + + def __init__(self, h:"ROOT.TH3", underflow_bin:bool=False, overflow_bin:bool=False): + import ROOT + dtype_map = { + ROOT.TH3I: "int", + ROOT.TH3F: "float", + ROOT.TH3D: "double" + } + self.dtype = dtype_map.get(type(h), "double") + self.underflow_bin = underflow_bin + self.overflow_bin = overflow_bin + self.init(h) + + def get_fundamental_type(self): + import ROOT + return ROOT.TH3 + + def init(self, h): + self.bin_content = self.GetBinContentArray(h, self.dtype, self.underflow_bin, self.overflow_bin) + self.x_bin_center = self.GetXBinCenterArray(h, self.dtype, self.underflow_bin, self.overflow_bin) + self.y_bin_center = self.GetYBinCenterArray(h, self.dtype, self.underflow_bin, self.overflow_bin) + self.z_bin_center = self.GetZBinCenterArray(h, self.dtype, self.underflow_bin, self.overflow_bin) + self.x_bin_width = self.GetXBinWidthArray(h, self.dtype, self.underflow_bin, self.overflow_bin) + self.y_bin_width = self.GetYBinWidthArray(h, self.dtype, self.underflow_bin, self.overflow_bin) + self.z_bin_width = self.GetZBinWidthArray(h, self.dtype, self.underflow_bin, self.overflow_bin) + self.x_bin_low_edge = self.GetXBinLowEdgeArray(h, self.dtype, self.underflow_bin, self.overflow_bin) + self.y_bin_low_edge = self.GetYBinLowEdgeArray(h, self.dtype, self.underflow_bin, self.overflow_bin) + self.z_bin_low_edge = self.GetZBinLowEdgeArray(h, self.dtype, self.underflow_bin, self.overflow_bin) + + @staticmethod + def GetBinContentArray(h, dtype:str='double', underflow_bin:bool=False, overflow_bin:bool=False): + arr = h.GetArray() + n_bins_x = h.GetNbinsX() + n_bins_y = h.GetNbinsY() + n_bins_z = h.GetNbinsZ() + # account for underflow and overflow bins + size = (n_bins_x + 2) * (n_bins_y + 2) * (n_bins_z + 2) + np_arr = c_array_to_np_array(arr, size=size) + np_arr = np_arr.reshape((n_bins_x + 2, n_bins_y + 2, n_bins_z + 2), order='F') + x_start = 1 if not underflow_bin else 0 + y_start = x_start + z_start = x_start + x_end = -1 if not overflow_bin else n_bins_x + 2 + y_end = -1 if not overflow_bin else n_bins_y + 2 + z_end = -1 if not overflow_bin else n_bins_z + 2 + np_arr = np_arr[x_start:x_end, y_start:y_end, z_start:z_end] + return np_arr + + @staticmethod + def GetXLabelArray(h:"ROOT.TH2"): + return np.array(h.GetXaxis().GetLabels(), dtype=str) + + @staticmethod + def GetYLabelArray(h:"ROOT.TH2"): + return np.array(h.GetYaxis().GetLabels(), dtype=str) + + @staticmethod + def GetZLabelArray(h:"ROOT.TH2"): + return np.array(h.GetYaxis().GetLabels(), dtype=str) + + @staticmethod + def GetAxisBinCenterArray(ax:"ROOT.TAxis", dtype:str='double', underflow_bin:int=0, overflow_bin:int=0): + import ROOT + c_vector = ROOT.TAxisUtils.GetBinCenterArray[dtype](ax, underflow_bin, overflow_bin) + return TArrayData.vec_to_array(c_vector) + + @staticmethod + def GetAxisBinWidthArray(ax:"ROOT.TAxis", dtype:str='double', underflow_bin:int=0, overflow_bin:int=0): + import ROOT + c_vector = ROOT.TAxisUtils.GetBinWidthArray[dtype](ax, underflow_bin, overflow_bin) + return TArrayData.vec_to_array(c_vector) + + @staticmethod + def GetAxisBinLowEdgeArray(ax:"ROOT.TAxis", dtype:str='double', underflow_bin:int=0, overflow_bin:int=0): + import ROOT + c_vector = ROOT.TAxisUtils.GetBinLowEdgeArray[dtype](ax, underflow_bin, overflow_bin) + return TArrayData.vec_to_array(c_vector) + + @staticmethod + def GetXBinCenterArray(h:"ROOT.TH2", dtype:str='double', underflow_bin:bool=False, overflow_bin:bool=False): + return TH2.GetAxisBinCenterArray(h.GetXaxis(), dtype, underflow_bin, overflow_bin) + + @staticmethod + def GetYBinCenterArray(h:"ROOT.TH2", dtype:str='double', underflow_bin:bool=False, overflow_bin:bool=False): + return TH2.GetAxisBinCenterArray(h.GetYaxis(), dtype, underflow_bin, overflow_bin) + + @staticmethod + def GetZBinCenterArray(h:"ROOT.TH2", dtype:str='double', underflow_bin:bool=False, overflow_bin:bool=False): + return TH2.GetAxisBinCenterArray(h.GetZaxis(), dtype, underflow_bin, overflow_bin) + + @staticmethod + def GetXBinWidthArray(h:"ROOT.TH2", dtype:str='double', underflow_bin:bool=False, overflow_bin:bool=False): + return TH2.GetAxisBinWidthArray(h.GetXaxis(), dtype, underflow_bin, overflow_bin) + + @staticmethod + def GetYBinWidthArray(h:"ROOT.TH2", dtype:str='double', underflow_bin:bool=False, overflow_bin:bool=False): + return TH2.GetAxisBinWidthArray(h.GetYaxis(), dtype, underflow_bin, overflow_bin) + + @staticmethod + def GetZBinWidthArray(h:"ROOT.TH2", dtype:str='double', underflow_bin:bool=False, overflow_bin:bool=False): + return TH2.GetAxisBinWidthArray(h.GetZaxis(), dtype, underflow_bin, overflow_bin) + + @staticmethod + def GetXBinLowEdgeArray(h:"ROOT.TH2", dtype:str='double', underflow_bin:int=0, overflow_bin:int=0): + return TH2.GetAxisBinLowEdgeArray(h.GetXaxis(), dtype, underflow_bin, overflow_bin) + + @staticmethod + def GetYBinLowEdgeArray(h:"ROOT.TH2", dtype:str='double', underflow_bin:int=0, overflow_bin:int=0): + return TH2.GetAxisBinLowEdgeArray(h.GetYaxis(), dtype, underflow_bin, overflow_bin) + + @staticmethod + def GetZBinLowEdgeArray(h:"ROOT.TH2", dtype:str='double', underflow_bin:int=0, overflow_bin:int=0): + return TH2.GetAxisBinLowEdgeArray(h.GetZaxis(), dtype, underflow_bin, overflow_bin) \ No newline at end of file diff --git a/quickstats/interface/root/__init__.py b/quickstats/interface/root/__init__.py index b1c921e16e6590ca5d2b8501c8eef60b809cacab..413ffcd9398f76edfab0ccfd80bc0200e55ff657 100644 --- a/quickstats/interface/root/__init__.py +++ b/quickstats/interface/root/__init__.py @@ -15,6 +15,7 @@ from quickstats.interface.root.RooCategory import RooCategory from quickstats.interface.root.RooAbsPdf import RooAbsPdf from quickstats.interface.root.RooArgSet import RooArgSet from quickstats.interface.root.RooMsgService import RooMsgService +from quickstats.interface.root.ModelConfig import ModelConfig load_macros() diff --git a/quickstats/interface/root/macros.py b/quickstats/interface/root/macros.py index 1a84304b64afb1d362d5e3500452d2f4eb5eff80..999b3b76ae270e84ff1e2bb7a714ad5757902f92 100644 --- a/quickstats/interface/root/macros.py +++ b/quickstats/interface/root/macros.py @@ -197,10 +197,11 @@ ROOT_MACROS = \ "RFUtils": """ #include <iostream> - #include <chrono> - using std::chrono::high_resolution_clock; - using std::chrono::duration; - using std::chrono::milliseconds; + #include <fstream> + //#include <chrono> + //using std::chrono::high_resolution_clock; + //using std::chrono::duration; + //using std::chrono::milliseconds; namespace RFUtils{ struct DatasetStruct { @@ -413,16 +414,22 @@ ROOT_MACROS = \ const size_t numEntries = dataset->numEntries(); std::vector<double> values(numEntries); RooArgSet* pdfObs = pdf->getObservables(dataset->get()); - double expected_events = 1.; + for (auto const& obs: *pdfObs) + obs->recursiveRedirectServers(*pdfObs, false, false, false); + RooArgSet projectedVars; + RooArgSet* cloneSet = nullptr; + const RooAbsReal *projected = pdf->createPlotProjection(*pdfObs, projectedVars, cloneSet); + double scale_factor = 1.; if (normalize) - expected_events = pdf->expectedEvents(*pdfObs); + scale_factor = pdf->expectedEvents(*pdfObs); for (size_t i = 0; i < numEntries; i++){ // faster than // pdfObs->assignFast(*dataset->get(i)); for (auto const& x : *dataset->get(i)) ((RooRealVar*)(pdfObs->find(x->GetName())))->setVal(((RooRealVar*)x)->getVal()); - values[i] = pdf->getVal(*pdfObs) * expected_events; + values[i] = projected->getVal() * scale_factor; } + delete cloneSet; return values; } @@ -460,15 +467,163 @@ ROOT_MACROS = \ } } - RooArgSet SelectConstantArgs(RooArgSet* args, bool isConstant=true){ + RooArgSet GetConstantParameters(RooArgSet* args, bool isConstant=true){ RooArgSet result; - const size_t n_args = args->size(); - for (size_t i = 0; i < n_args; i++) - if (((RooRealVar*)(*args)[i])->isConstant() == isConstant) - result.add(*((*args)[i])); + for (auto const *arg: *args) + if (((RooRealVar*)arg)->isConstant() == isConstant) + result.add(*arg); return result; } + bool ParameterCloseToMin(RooRealVar* param, float threshold=0.1){ + if ((!param->hasMin()) || (!param->hasMax())) + return false; + return param->getVal() < ((1. - threshold) * param->getMin() + threshold * param->getMax()); + } + + bool ParameterCloseToMax(RooRealVar* param, float threshold=0.1){ + if ((!param->hasMin()) || (!param->hasMax())) + return false; + return param->getVal() > ((1. - threshold) * param->getMax() + threshold * param->getMin()); + } + + bool ParameterCloseToBoundary(RooRealVar* param, float threshold=0.1){ + return ParameterCloseToMin(param, threshold) || ParameterCloseToMax(param, threshold); + } + + RooArgSet GetParametersCloseToMin(RooArgSet* args, float threshold=0.1){ + RooArgSet result; + for (auto const *arg: *args) + if (ParameterCloseToMin((RooRealVar*)arg), threshold) + result.add(*arg); + return result; + } + + RooArgSet GetParametersCloseToMax(RooArgSet* args, float threshold=0.1){ + RooArgSet result; + for (auto const *arg: *args) + if (ParameterCloseToMax((RooRealVar*)arg), threshold) + result.add(*arg); + return result; + } + + RooArgSet GetParametersCloseToBoundary(RooArgSet* args, float threshold=0.1){ + RooArgSet result; + for (auto const *arg: *args) + if (ParameterCloseToBoundary((RooRealVar*)arg), threshold) + result.add(*arg); + return result; + } + + void ExpandParametersRange(RooArgSet* args, float threshold=0.1, + bool expand_min=true, bool expand_max=true, + RooArgSet* outOrigArgsAtMin=nullptr, + RooArgSet* outNewArgsAtMin=nullptr, + RooArgSet* outOrigArgsAtMax=nullptr, + RooArgSet* outNewArgsAtMax=nullptr){ + for (auto *arg: *args){ + RooRealVar* param = dynamic_cast<RooRealVar*>(arg); + if (!param) + continue; + if (expand_min && ParameterCloseToMin(param, threshold)){ + const double val = param->getVal(); + if (outOrigArgsAtMin != nullptr) + outOrigArgsAtMin->add(*(RooRealVar*)param->Clone()); + param->setMin(val - (param->getMax() - val)); + if (outNewArgsAtMin != nullptr) + outNewArgsAtMin->add(*(RooRealVar*)param->Clone()); + } + else if (expand_max && ParameterCloseToMax(param, threshold)){ + const double val = param->getVal(); + if (outOrigArgsAtMax != nullptr) + outOrigArgsAtMax->add(*(RooRealVar*)param->Clone()); + param->setMax(val + (val - param->getMin())); + if (outNewArgsAtMax != nullptr) + outNewArgsAtMax->add(*(RooRealVar*)param->Clone()); + } + } + } + + bool ParameterAtBoundary(RooRealVar* param, float nsigma=1.0){ + const double value = param->getVal(); + return ((value - param->getMin()) < nsigma * -1. * param->getErrorLo()) || + ((param->getMax() - value) < nsigma * param->getErrorHi()); + } + + RooArgSet GetBoundaryParameters(RooArgSet* args){ + RooArgSet result; + for (auto const *arg: *args) + if (ParameterAtBoundary((RooRealVar*)arg)) + result.add(*arg); + return result; + } + + RooArgSet SelectByClass(RooArgSet* args, const char* classname){ + RooArgSet result; + for (auto const *arg: *args) + if (arg->InheritsFrom(classname)) + result.add(*arg); + return result; + } + + RooArgSet ExcludeByClass(RooArgSet* args, const char* classname){ + RooArgSet result; + for (auto const *arg: *args) + if (!arg->InheritsFrom(classname)) + result.add(*arg); + return result; + } + + RooArgSet SelectDependentParameters(RooArgSet* args, RooAbsArg* source){ + RooArgSet result; + for (auto const *arg: *args) + if (source->dependsOn(*arg)) + result.add(*arg); + return result; + } + + RooArgSet GetRooArgSetDifference(RooArgSet* args1, RooArgSet* args2){ + RooArgSet result; + for (auto const *arg: *args1) + if (!args2->find(*arg)) + result.add(*arg); + return result; + } + + template<typename T> + int SetCategoryIndices(RooArgList* cats, std::vector<T> *indices){ + int changedIndex = -1; + if (cats->size() != indices->size()) + throw std::runtime_error("category index size mismatch"); + for (size_t i = 0; i < indices->size(); ++i){ + RooCategory* cat = dynamic_cast<RooCategory*>(cats->at(i)); + if (!cat) + throw std::runtime_error("encountered non-RooCategory instance"); + if (cat->getIndex() != indices->at(i)){ + changedIndex = i; + cat->setIndex(indices->at(i)); + } + } + return changedIndex; + } + + void SaveRooArgSetDataAsTxt(const RooArgSet *args, const std::string &filename, size_t precision=8){ + std::ofstream outfile(filename, std::ofstream::trunc); + for (auto arg: *args){ + RooRealVar* v = dynamic_cast<RooRealVar*>(arg); + if (v) + outfile << v->GetName() <<" "<<std::fixed<<std::setprecision(precision)<<v->getVal()<<" "<<v->isConstant()<<" "<< + std::fixed<<std::setprecision(precision)<<v->getMin()<<" "<< + std::fixed<<std::setprecision(precision)<<v->getMax()<<std::endl; + else{ + RooCategory* cat = dynamic_cast<RooCategory*>(arg); + if (cat) + outfile<<cat->GetName()<<" "<<cat->getCurrentIndex()<<" "<<cat->isConstant()<<std::endl; + } + } + outfile.close(); + } + #if ROOT_VERSION_CODE >= ROOT_VERSION(6,26,0) #endif }; diff --git a/quickstats/maths/interpolation.py b/quickstats/maths/interpolation.py index 2a9cbac9a0f7ff2806f0727390c33dffbd29cb89..c3552766b55217143ef66f70d97a0cae703081aa 100644 --- a/quickstats/maths/interpolation.py +++ b/quickstats/maths/interpolation.py @@ -54,6 +54,33 @@ def get_x_intersections(x1, y1, x2, y2): intersections = [x2[i] - (x2[i + 1] - x2[i])/(diff[i + 1] - diff[i]) * diff[i] for i in idx] return intersections +def get_intervals_between_curves(x1, y1, x2, y2): + """Get x intervals of intersection between two curves + """ + interp_y1 = np.interp(x2, x1, y1) + + diff = interp_y1 - y2 + sign_change = np.diff(np.sign(diff)) + # determines what index intersection points are at + idx = np.argwhere(sign_change).flatten() + #linear interpolation to get exact intercepts: x = x1 + (x2-x1)/(y2-y1) * (y-y1) + #y = 0 -> x = x1 - (x2-x1)/(y2-y1) * y1 + intersections = np.array([x2[i] - (x2[i + 1] - x2[i])/(diff[i + 1] - diff[i]) * diff[i] for i in idx]) + # no intersection + if len(intersections) == 0: + return intersections + # one-sided interval + elif len(intersections) == 1: + sign = sign_change[idx[0]] + if sign < 0: + return np.array([-np.inf, intersections[0]]) + return np.array([intersections[0], np.inf]) + elif len(intersections == 2): + if (sign_change[idx[0]] + sign_change[idx[1]]) != 0: + raise RuntimeError('found discontinuous curves') + return intersections + raise RuntimeError('found multiple intervals') + def interpolate_2d(x:np.ndarray, y:np.ndarray, z:np.ndarray, method:str='cubic', n:int=500): from scipy import interpolate mask = ~np.isnan(z) diff --git a/quickstats/parsers/param_parser.py b/quickstats/parsers/param_parser.py index f206b8bb67e251de78837e6c262fc428700f2ec2..66311d7788e1baffdface960ac794ff774deb15e 100644 --- a/quickstats/parsers/param_parser.py +++ b/quickstats/parsers/param_parser.py @@ -6,6 +6,7 @@ import glob import fnmatch import itertools from functools import partial +from collections import ChainMap import numpy as np @@ -132,7 +133,9 @@ class ParamParser: return regex @staticmethod - def sort_param_points(param_points:List, attributes:List): + def sort_param_points(param_points:List, attributes:Optional[List]=None): + if attributes is None: + attributes = list(param_points[0]['parameters']) key = lambda d: tuple(d['parameters'][attrib] for attrib in attributes) return sorted(param_points, key=key) @@ -176,13 +179,14 @@ class ParamParser: if param_str is None: return {} param_points = [] - idp_param_strs = split_str(param_str, sep=';', remove_empty=True) + # components separated by ; are subject to independent conditions + ind_components = split_str(param_str, sep=';', remove_empty=True) all_combinations = [] - for idp_param_str in idp_param_strs: + for component in ind_components: param_values = {} - param_expr_list = split_str(idp_param_str, sep=',', remove_empty=True) + param_expr_list = split_str(component, sep=',', remove_empty=True, use_paranthesis=True) for expr in param_expr_list: - tokens = expr.split('=') + tokens = split_str(expr, sep='=') # floating poi if len(tokens) == 1: param_name = tokens[0] @@ -193,26 +197,31 @@ class ParamParser: continue if len(tokens) != 2: raise ValueError('invalid expression for parameterisation') - param_name = tokens[0] - values_expr = tokens[1] - tokens = split_str(values_expr, sep='_') - # fixed value - if len(tokens) == 1: - values = [float(tokens[0])] - # scan across range - elif len(tokens) == 3: - poi_min = float(tokens[0]) - poi_max = float(tokens[1]) - poi_step = float(tokens[2]) - values = np.arange(poi_min, poi_max + poi_step, poi_step) - else: - raise ValueError('invalid expression for parameterisation') + param_name, values_expr = tokens if param_name not in param_values: param_values[param_name] = np.array([]) if None in param_values[param_name]: raise RuntimeError(f"the parameter {param_name} is being profiled and non-profiled at the same time " f"in the parameter expression: {idp_param_str}") - param_values[param_name] = np.concatenate([param_values[param_name], values]) + # grouped expression + if values_expr.startswith('(') and values_expr.endswith(')'): + values_expr_list = split_str(values_expr[1:-1], sep=',') + else: + values_expr_list = [values_expr] + for expr in values_expr_list: + tokens = split_str(expr, sep='_') + # fixed value + if len(tokens) == 1: + values = [float(tokens[0])] + # scan across range + elif len(tokens) == 3: + poi_min = float(tokens[0]) + poi_max = float(tokens[1]) + poi_step = float(tokens[2]) + values = np.arange(poi_min, poi_max + poi_step, poi_step) + else: + raise ValueError('invalid expression for parameterisation') + param_values[param_name] = np.concatenate([param_values[param_name], values]) param_names = list(param_values.keys()) # convert to numpy arrays combinations = [np.array(param_values[param_name]) for param_name in param_names] @@ -274,14 +283,14 @@ class ParamParser: param_points.append(point) attributes = list(self.attribute_parser) param_points = self.sort_param_points(param_points, attributes) - selected_param_points = self.select_param_points(param_points, filter_expr, - exclude_expr, dict_key="parameters") + selected_param_points = self.get_selected_points(param_points, filter_expr, + exclude_expr, dict_keys="parameters") return selected_param_points def get_internal_param_points(self, filter_expr:Optional[str]=None, exclude_expr:Optional[str]=None): param_points = self.parse_param_str(self.param_str) - selected_param_points = self.select_param_points(param_points, filter_expr, + selected_param_points = self.get_selected_points(param_points, filter_expr, exclude_expr) return selected_param_points @@ -291,66 +300,65 @@ class ParamParser: return [] expr = remove_whitespace(expr) match_conditions = [] - tokens = split_str(expr, sep=';', remove_empty=True) - for token in tokens: + # components separated by ; are subject to independent conditions + ind_components = split_str(expr, sep=';', remove_empty=True) + for component in ind_components: + conditions = split_str(component, sep=',', remove_empty=True, use_paranthesis=True) match_condition = {} - param_conditions = split_str(token, sep=',', remove_empty=True) - for param_condition in param_conditions: - subtokens = split_str(param_condition, sep='=') - if len(subtokens) != 2: - raise RuntimeError(f"invalid parameter condition `{param_condition}` in the " - f"filter/exclude expression `{expr}`") - param_name, condition = subtokens - if "*" in condition: - match_condition[param_name] = partial(MatchMode.Wildcard.match_func, y=condition) + for condition in conditions: + tokens = split_str(condition, sep='=') + if len(tokens) != 2: + raise ValueError(f'invalid sub-expression: {condition}') + param_name, values_expr = tokens + if param_name not in match_condition: + match_condition[param_name] = [] + if values_expr.startswith('(') and values_expr.endswith(')'): + values_expr_list = split_str(values_expr[1:-1], sep=',') else: - match_condition[param_name] = partial(MatchMode.Numerical.match_func, y=condition) + values_expr_list = [values_expr] + for values_expr in values_expr_list: + if "*" in values_expr: + match_func = MatchMode.Wildcard.match_func + else: + match_func = MatchMode.Numerical.match_func + match_condition[param_name].append(partial(match_func, y=values_expr)) match_conditions.append(match_condition) return match_conditions - - @staticmethod - def select_param_point(parameters, conditions:List[Dict], default:bool=True): - selected = not default - for condition in conditions: - for param_name, match_func in condition.items(): - if param_name not in parameters: - continue - param_value = parameters[param_name] - if not match_func(param_value): - return not default - return default - - @staticmethod - def select_param_point(parameters, conditions:List[Dict], default:bool=True): - for condition in conditions: - for param_name, match_func in condition.items(): - # consider not matched if any of the param-specific condition not satisfied - if (param_name not in parameters) or (not match_func(parameters[param_name])): - break - else: - return default - return not default - - def select_param_points(self, param_points:List, + + @semistaticmethod + def get_selected_points(self, param_points:List, filter_expr:Optional[str]=None, exclude_expr:Optional[str]=None, - dict_key:Optional=None): - selected_param_points = [] + dict_keys:Optional=None): filter_conditions = self.parse_filter_expr(filter_expr) exclude_conditions = self.parse_filter_expr(exclude_expr) - for param_point in param_points: - if dict_key: - parameters = param_point[dict_key] - else: - parameters = param_point - keep = True - if filter_conditions: - keep = self.select_param_point(parameters, filter_conditions, default=True) - if exclude_conditions: - keep &= not (self.select_param_point(parameters, exclude_conditions, default=False)) - if keep: - selected_param_points.append(param_point) - return selected_param_points + def get_attrib(x): + if dict_keys is None: + return x + if isinstance(dict_keys, str): + return x[dict_keys] + if isinstance(dict_keys, list): + return dict(ChainMap(*[x[k] for k in dict_keys])) + raise ValueError('invalid value for "dict_keys"') + selected_points = param_points + if filter_expr is not None: + selected_points = [point for point in selected_points \ + if self.is_point_selected(get_attrib(point), filter_conditions)] + if exclude_expr is not None: + selected_points = [point for point in selected_points \ + if not self.is_point_selected(get_attrib(point), exclude_conditions)] + return selected_points + + @staticmethod + def is_point_selected(param_point, conditions): + for condition in conditions: + selected = True + for param_name, match_functions in condition.items(): + param_value = param_point[param_name] + selected &= any(match_func(param_value) for match_func in match_functions) + if selected: + return True + return False def sanity_check(self, internal_param_points:Dict, external_param_points:Dict): if not self.allow_none: @@ -362,8 +370,8 @@ class ParamParser: def get_param_points(self, dirname:str="", filter_expr:Optional[str]=None, exclude_expr:Optional[str]=None): - external_param_points = self.get_external_param_points(dirname, filter_expr, exclude_expr) - internal_param_points = self.get_internal_param_points(filter_expr, exclude_expr) + external_param_points = self.get_external_param_points(dirname) + internal_param_points = self.get_internal_param_points() self.sanity_check(internal_param_points, external_param_points) if len(internal_param_points) == 0: internal_param_points = [{}] @@ -379,4 +387,6 @@ class ParamParser: param_point['external_parameters'] = {**ext_params} param_point['internal_parameters'] = {**int_params} param_points.append(param_point) + param_points = self.get_selected_points(param_points, filter_expr, exclude_expr, + dict_keys=['internal_parameters', 'external_parameters']) return param_points \ No newline at end of file diff --git a/quickstats/plots/template.py b/quickstats/plots/template.py index 38732aa08854151d1689870ad454fefe57d24ef4..fcd9ccef4fc373ebbc6558f4947fac8352f1604f 100644 --- a/quickstats/plots/template.py +++ b/quickstats/plots/template.py @@ -257,9 +257,10 @@ def ratio_frames(height_ratios:Tuple[int]=(3, 1), hspace:float=0.07, if logy: ax1.set_yscale('log') - - format_axis_ticks(ax1, x_axis=True, y_axis=True, x_axis_styles={"labelbottom":False}, - xtick_styles=styles['xtick'], ytick_styles=styles['ytick'], **styles['axis']) + + ax1_styles = combine_dict(styles['axis'], {"x_axis_styles": {"labelbottom": False}}) + format_axis_ticks(ax1, x_axis=True, y_axis=True, xtick_styles=styles['xtick'], + ytick_styles=styles['ytick'], **ax1_styles) format_axis_ticks(ax2, x_axis=True, y_axis=True, xtick_styles=styles['xtick'], ytick_styles=styles['ytick'], **styles['axis']) @@ -701,7 +702,7 @@ def draw_analysis_label(axis, loc=(0.05, 0.95), fontsize:float=25, status:str='i main_texts.extend(main_text.split("//")) if colab is not None: # add collaboration and status text - colab_text = r"\bolditalic{" + colab + "} " + status_text + colab_text = r"\bolditalic{" + colab + "} " + status_text main_texts.append(colab_text) for text in main_texts: _, _, ymin, _ = draw_text(axis, xmin, ymin, text, diff --git a/quickstats/plots/upper_limit_2D_plot.py b/quickstats/plots/upper_limit_2D_plot.py index 3fc25773609da381041f4b15b7a45ee387c38d3e..323486fca5225997a7f2aad9c362da3e3d36a177 100644 --- a/quickstats/plots/upper_limit_2D_plot.py +++ b/quickstats/plots/upper_limit_2D_plot.py @@ -8,7 +8,7 @@ import matplotlib.lines as lines from quickstats.plots import AbstractPlot from quickstats.utils.common_utils import combine_dict -from quickstats.maths.interpolation import get_x_intersections +from quickstats.maths.interpolation import get_intervals_between_curves class UpperLimit2DPlot(AbstractPlot): @@ -91,11 +91,14 @@ class UpperLimit2DPlot(AbstractPlot): 'constraint_options': { 'loc': (0.05, 0.25), 'dy': 0.06, + 'decimal_place': 2, 'use_signal_strength': True, - 'expected_interval_fmt': r'Expected: {xlabel}$\in [{lower_interval:.2f}, {upper_interval:.2f}]$', - 'observed_interval_fmt': r'Observed: {xlabel}$\in [{lower_interval:.2f}, {upper_interval:.2f}]$', - 'expected_length_fmt': r'Allowed range: {length:.2f}', - 'observed_length_fmt': r'Allowed range: {length:.2f}' + 'expected_interval_fmt': r'Expected: {xlabel}$\in [{lower_interval:.{decimal_place}f}' + r', {upper_interval:.{decimal_place}f}]$', + 'observed_interval_fmt': r'Observed: {xlabel}$\in [{lower_interval:.{decimal_place}f}' + r', {upper_interval:.{decimal_place}f}]$', + 'expected_length_fmt': r'Allowed range: {length:.{decimal_place}f}', + 'observed_length_fmt': r'Allowed range: {length:.{decimal_place}f}' } } @@ -306,13 +309,15 @@ class UpperLimit2DPlot(AbstractPlot): if (not use_signal_strength) and (scale_theory and (self.theory_func is not None)): _, scale_y, _, _ = self.get_theory_prediction(x) _, theo_y, _, _ = self.get_theory_prediction(theo_x) - intersections = get_x_intersections(x, y * scale_y, theo_x, theo_y) + interval = get_intervals_between_curves(x, y * scale_y, theo_x, theo_y) else: - intersections = get_x_intersections(x, y, theo_x, np.ones(theo_x.shape)) - if len(intersections) != 2: + interval = get_intervals_between_curves(x, y, theo_x, np.ones(theo_x.shape)) + if len(interval) == 0: return None, None - interval = intersections - length = (intersections[1] - intersections[0]) + if np.isfinite(interval).all(): + length = (interval[1] - interval[0]) + else: + length = None return interval, length def draw_constraint(self, ax, data:pd.DataFrame, @@ -325,9 +330,10 @@ class UpperLimit2DPlot(AbstractPlot): option:Optional[Dict]=None): if option is None: option = self.config['constraint_options'] + decimal_place = option['decimal_place'] loc = option['loc'] dy = option['dy'] - txts = [] + texts = [] x = data.index.values for flag, column, key in [(draw_expected, '0', 'expected'), (draw_observed, 'obs', 'observed')]: @@ -336,15 +342,17 @@ class UpperLimit2DPlot(AbstractPlot): y = data[column].values interval, length = self.get_interval_and_length(x, y, scale_theory=scale_theory) if draw_interval and (interval is not None): - interval_txt = option[f'{key}_interval_fmt'].format(xlabel=xlabel, + interval_str = option[f'{key}_interval_fmt'].format(xlabel=xlabel, lower_interval=interval[0], - upper_interval=interval[1]) - txts.append(interval_txt) + upper_interval=interval[1], + decimal_place=decimal_place) + interval_str = interval_str.replace('-inf', r'N.A.').replace('inf', 'N.A.') + texts.append(interval_str) if draw_length and (length is not None): - length_txt = option[f'{key}_length_fmt'].format(length=length) - txts.append(length_txt) - for i, txt in enumerate(txts): - ax.annotate(txt, (loc[0], loc[1] - i * dy), xycoords='axes fraction', + interval_str = option[f'{key}_length_fmt'].format(length=length, decimal_place=decimal_place) + texts.append(interval_str) + for i, text in enumerate(texts): + ax.annotate(text, (loc[0], loc[1] - i * dy), xycoords='axes fraction', **self.styles['annotation']) def _get_candidate_columns(self, data:pd.DataFrame): diff --git a/quickstats/plots/variable_distribution_plot.py b/quickstats/plots/variable_distribution_plot.py index c8d0d4944906b2292017a37f8ebe4480293addc2..826161d373f688492cdbf6eb79b166354273926c 100644 --- a/quickstats/plots/variable_distribution_plot.py +++ b/quickstats/plots/variable_distribution_plot.py @@ -62,7 +62,8 @@ class VariableDistributionPlot(AbstractPlot): 'error_label_format': r'{label} $\pm \sigma$', 'show_xerr': False, 'stacked_label': ':stacked_{index}:', - 'box_legend_handle': False + 'box_legend_handle': False, + 'save_hist_data': False } def __init__(self, data_map:Union["pandas.DataFrame", Dict[str, "pandas.DataFrame"]], @@ -121,6 +122,7 @@ class VariableDistributionPlot(AbstractPlot): self.load_data(data_map) self.plot_options = plot_options self.label_map = label_map + self.reset_hist_data() super().__init__(color_cycle=color_cycle, styles=styles, analysis_label_options=analysis_label_options, @@ -287,7 +289,10 @@ class VariableDistributionPlot(AbstractPlot): if ylim[1] < np.max(y): ylim[1] = np.max(y) ax.set_ylim(ylim) - + + if self.config['save_hist_data']: + self.hist_comparison_data.append(comparison_data) + return handle, error_handle def deduce_bin_range(self, samples:List[str], column_name:str, @@ -393,6 +398,10 @@ class VariableDistributionPlot(AbstractPlot): for i, target in enumerate(plot_options): self.update_legend_handles({target:handle[i]}) return bin_edges, hist_data + + def reset_hist_data(self): + self.hist_data = {} + self.hist_comparison_data = [] def draw(self, column_name:str, weight_name:Optional[str]=None, targets:Optional[List[str]]=None, @@ -489,6 +498,7 @@ class VariableDistributionPlot(AbstractPlot): bin_range = self.deduce_bin_range(relevant_samples, column_name, variable_scale=variable_scale) self.stdout.info(f"Using deduced bin range ({bin_range[0]:.3f}, {bin_range[1]:.3f})") + self.reset_data() binned_data = {} target_bin_edges = {} @@ -601,7 +611,10 @@ class VariableDistributionPlot(AbstractPlot): self.decorate_comparison_axis(ax_ratio, **comparison_options) ax.set(xlabel=None) ax.tick_params(axis="x", labelbottom=False) - + + if save_hist_data: + self.hist_data = binned_data + if comparison_options is not None: return ax, ax_ratio diff --git a/quickstats/utils/common_utils.py b/quickstats/utils/common_utils.py index d0d234aa1667b36c4cf7139f71c59576b4e7c290..2324c987a126083bcf6dbc3c5b35af14b55d8c32 100644 --- a/quickstats/utils/common_utils.py +++ b/quickstats/utils/common_utils.py @@ -12,7 +12,7 @@ import collections.abc import numpy as np -from .string_utils import split_str +from .string_utils import split_str, parse_as_dict def timely_info(green_text, normal_text, adjust=40): print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '\033[92m[INFO]\033[0m', '\033[92m{}\033[0m'.format(green_text).rjust(40, ' '), normal_text) @@ -213,17 +213,44 @@ def get_class_that_defined_method(meth): return cls return getattr(meth, '__objclass__', None) -def batch_makedirs(dirnames:Union[str, List[str], Dict[str, str]]): +def batch_makedirs(dirnames:Union[str, List[str], Dict[str, str]]) -> None: + """ + Create multiple directories in a batch. + + This function accepts a single directory name as a string, a list of directory + names, or a dictionary where the values are directory names. It then creates + these directories if they do not already exist. + + Parameters + ---------- + dirnames : Union[str, List[str], Dict[str, str]] + The directory name(s) to be created. + - If it's a string, a single directory will be created. + - If it's a list, each element is treated as a directory name. + - If it's a dictionary, each value is treated as a directory name. + + Raises + ------ + ValueError + If `dirnames` is not a string, a list, or a dictionary. + + Examples + -------- + >>> batch_makedirs('new_dir') + >>> batch_makedirs(['new_dir1', 'new_dir2']) + >>> batch_makedirs({'dir1': 'new_dir1', 'dir2': 'new_dir2'}) + """ if isinstance(dirnames, str): - batch_makedirs([dirnames]) - elif isinstance(dirnames, list): - for dirname in dirnames: - if not os.path.exists(dirname): - os.makedirs(dirname) + dirnames = [dirnames] elif isinstance(dirnames, dict): - for key in dirnames: - if not os.path.exists(dirnames[key]): - os.makedirs(dirnames[key]) + dirnames = list(dirnames.values()) + + if isinstance(dirnames, list): + for dirname in dirnames: + abs_dirname = os.path.abspath(dirname) + os.makedirs(abs_dirname, exist_ok=True) + else: + raise ValueError('invalid format for "dirnames"') def set_scripts_path(scripts_path, undo=False): if (scripts_path in sys.path) and (undo==True): @@ -386,4 +413,38 @@ def filter_dataframe_by_index_values(df, index_values:Union[Tuple[List], List], index_levels = (index_levels,) for values, level in zip(index_values, index_levels): df = df.loc[df.index.get_level_values(level).isin(values)] - return df \ No newline at end of file + return df + +def parse_config_dict(expr:Optional[Union[str, Dict]]=None): + if expr is None: + return {} + if isinstance(expr, str): + return parse_as_dict(expr) + if isinstance(expr, dict): + return expr + raise ValueError(f'expr of type "{type(expr)}" is not convertible to a config dict') + +def update_config_dict(orig_dict, new_dict, allow_overlap_keys:bool=True): + orig_dict = parse_config_dict(orig_dict) + new_dict = parse_config_dict(new_dict) + if not allow_overlap_keys: + overlap_keys = list(set(orig_dict.keys()) & set(new_dict.keys())) + if overlap_keys: + raise RuntimeError(f'found overlap keys between two config dict: {", ".join(overlap_keys)}') + return combine_dict(orig_dict, new_dict) + +def list_of_dict_to_dict_of_list(source:List[Dict], use_first_keys:bool=True): + if use_first_keys: + return {k: [item[k] for item in source] for k in source[0]} + common_keys = set.intersection(*map(set, source)) + return {k: [item[k] for item in source] for k in common_keys} + +def dict_of_list_to_list_of_dict(source:Dict[str, List]): + return [dict(zip(source, t)) for t in zip(*source.values())] + +def save_as_json(data:Dict, outname:str, + indent:int=2, truncate:bool=True): + with open(outname, "w") as file: + json.dump(data, file, indent=2) + if truncate: + file.truncate() \ No newline at end of file diff --git a/quickstats/utils/roofit_utils.py b/quickstats/utils/roofit_utils.py index 6b1bacd0230edd3ea867fc1025cff25a4bde453f..497337c5294a7173f21733d6433eb850f6daf89b 100644 --- a/quickstats/utils/roofit_utils.py +++ b/quickstats/utils/roofit_utils.py @@ -42,7 +42,9 @@ def get_variable_attributes(variable:ROOT.RooAbsReal, asym_error:bool=False): 'value': variable.getVal() } else: - raise ValueError("variable must be an instance of RooAbsReal") + attributes = { + 'name' : variable.GetName() + } return attributes def variable_collection_to_dataframe(var_collection:Union[list, ROOT.RooArgSet], @@ -81,7 +83,7 @@ def construct_categorized_pdf_dataset(pdf:"ROOT.RooAbsPdf", dataset:"ROOT.RooDat def factorize_pdf(observables:"ROOT.RooArgSet", pdf:"ROOT.RooAbsPdf", constraints:"ROOT.RooArgSet"): pdf_class = pdf.ClassName() - if pdf_class == "RooProdPdf": + if pdf_class.InheritsFrom("RooProdPdf"): new_factors = ROOT.RooArgList() new_owned = ROOT.RooArgSet() pdf_list = pdf.pdfList() @@ -108,7 +110,7 @@ def factorize_pdf(observables:"ROOT.RooArgSet", pdf:"ROOT.RooAbsPdf", constraint factorized_pdf.addOwnedComponents(new_owned) copy_attributes(pdf, factorized_pdf) return factorized_pdf - elif pdf_class == "RooSimultaneous": + elif pdf_class.InheritsFrom("RooSimultaneous"): cat = pdf.indexCat().Clone() n_bins = cat.numBins("") factorized_pdfs = [] @@ -142,11 +144,10 @@ def factorize_pdf(observables:"ROOT.RooArgSet", pdf:"ROOT.RooAbsPdf", constraint elif pdf.dependsOn(observables): return pdf else: - if not constraints.contains(pdf): + if (not constraints.contains(pdf)) and (not pdf.getAttribute('ignoreConstraint')): constraints.add(pdf) return 0 - def rebuild_simultaneous_pdf(observables:"ROOT.RooArgSet", sim_pdf:"ROOT.RooSimultaneous"): assert sim_pdf.ClassName() == "RooSimultaneous" constraints = ROOT.RooArgList() diff --git a/quickstats/utils/roostats_utils.py b/quickstats/utils/roostats_utils.py index 69091c72098bc6dc720d01d2fb3a9b66f3fadc95..43832d8dda3f0c219a374a2581d2d72f0164996e 100644 --- a/quickstats/utils/roostats_utils.py +++ b/quickstats/utils/roostats_utils.py @@ -8,6 +8,14 @@ from cppyy.gbl.std import vector import ROOT import ROOT.RooStats as RS +from quickstats import DescriptiveEnum + +class PriorPdfType(DescriptiveEnum): + NONE = (0, "No prior") + FLAT = (1, "Uniform prior") + UNIFORM = (2, "Uniform prior") + INV_SQRT = (3, "Reciprocal of square root of POI") + def get_null_distribution(htr:ROOT.RooStats.HypoTestResult)->np.ndarray: return np.array(htr.GetNullDistribution().GetSamplingDistribution().data()) @@ -213,4 +221,41 @@ def get_hypotest_data(htr): cl_sb_err2 = pow(data['expected']['CLsplusbError'], 2) data['expected']['CLsError'] = np.sqrt(cl_sb_err2 + cl_b_err2 * pow(data['expected']['CLs'], 2)) / data['expected']['CLb'] - return data \ No newline at end of file + return data + +def set_prior_pdf(ws: "ROOT.RooWorkspace", + mc: "ROOT.RooStats.ModelConfig", + pois: "ROOT.RooArgSet", + prior_type:Optional[Union[PriorPdfType, str]]="flat", + prior_name:str="prior_pdf"): + if prior_type is None: + return + try: + prior_type = PriorPdfType.parse(prior_type) + except: + pass + if prior_type == PriorPdfType.NONE: + return + if prior_type in [PriorPdfType.UNIFORM, PriorPdfType.FLAT]: + prior_pdf = ROOT.RooUniform(prior_name, prior_name, pois) + ws.Import(prior_pdf) + mc.SetPriorPdf(prior_pdf) + return + + def set_prior_from_expr(expr:str): + if len(pois) != 1: + raise RuntimeError('number of POIs must be 1 when using the inverse square root prior') + poi_name = pois.first().GetName() + prior_expr = f"EXPR::{prior_name}(\"{expr}\",{poi_name})" + ws.factory(prior_expr) + prior_pdf = ws.pdf(prior_name) + mc.SetPriorPdf(prior_pdf) + + if prior_type == PriorPdfType.INV_SQRT: + set_prior_from_expr("1/sqrt(@0)") + elif isinstance(prior_type, str) and ("@0" in prior_type): + set_prior_from_expr(prior_type) + elif (prior_type and ws.pdf(prior_type)): + mc.SetPriorPdf(ws.pdf(prior_type)) + else: + raise RuntimeError(f'unknown prior type: {prior_type}') \ No newline at end of file diff --git a/quickstats/utils/root_utils.py b/quickstats/utils/root_utils.py index dc25cd395e5634bb7a6a8f4108dc976880ab9798..1da86d46f639677b557cb61a0f32275f5d85a379 100644 --- a/quickstats/utils/root_utils.py +++ b/quickstats/utils/root_utils.py @@ -377,4 +377,9 @@ def get_th1d_model(bins:Union[int, Sequence]=128, xlow, xup = 0., 0. else: xlow, xup = bin_range - return ROOT.RDF.TH1DModel(name, title, bins, xlow, xup) \ No newline at end of file + return ROOT.RDF.TH1DModel(name, title, bins, xlow, xup) + +def close_read_cache(rfile:"ROOT.TFile"): + read_cache = rfile.GetCacheRead() + if read_cache: + read_cache.Close() \ No newline at end of file diff --git a/quickstats/utils/string_utils.py b/quickstats/utils/string_utils.py index b160bbe3aa32e7b99186038d8ad435d9ebb7fe9c..ad4b535bd38875f9a4efc7f8c55bf282d1a8c953 100644 --- a/quickstats/utils/string_utils.py +++ b/quickstats/utils/string_utils.py @@ -1,5 +1,6 @@ -from typing import Optional, Callable +from typing import Optional, Callable, List import re +import ast def split_lines(s: str, comment_string: Optional[str] = "#", remove_blank: bool = True, with_line_number: bool = False, keepends: bool = False): @@ -37,7 +38,8 @@ def split_lines(s: str, comment_string: Optional[str] = "#", remove_blank: bool return lines -def split_str(s: str, sep: str = None, strip: bool = True, remove_empty: bool = False, cast: Optional[Callable] = None) -> list: +def split_str(s: str, sep: str = None, strip: bool = True, remove_empty: bool = False, cast: Optional[Callable] = None, + use_paranthesis:bool = False) -> List: """ Splits a string and applies optional transformations. @@ -54,18 +56,25 @@ def split_str(s: str, sep: str = None, strip: bool = True, remove_empty: bool = the string is split at any whitespace. Defaults to None. strip : bool, optional Whether to trim leading and trailing whitespace from each substring. Defaults to True. - remove_empty : bool, optional + remove_empty : bool, default = False Whether to remove empty substrings from the list. Defaults to False. cast : Callable, optional An optional casting function to apply to each substring. It should be a function that takes a single string argument and returns a value. Defaults to None. + use_paranthesis: bool, default = False + Whether to ignore separator within paranthesis. Returns ------- list A list of substrings (or transformed substrings) obtained by splitting the input string. """ - items = s.split(sep) + if use_paranthesis: + if sep is None: + raise ValueError('separator can not be None when "use_paranthesis" option is set to True') + items = re.split(sep + r'\s*(?![^()]*\))', s) + else: + items = s.split(sep) if strip: items = [item.strip() for item in items] if remove_empty: @@ -74,6 +83,11 @@ def split_str(s: str, sep: str = None, strip: bool = True, remove_empty: bool = items = [cast(item) for item in items] return items +def split_str_excl_paranthesis(s: str, sep: str = ",", strip: bool = True, remove_empty: bool = False) -> List: + regex = re.compile(sep + r'\s*(?![^()]*\))') + + + whitespace_trans = str.maketrans('', '', " \t\r\n\v") newline_trans = str.maketrans('', '', "\r\n") @@ -125,4 +139,41 @@ def remove_neg_zero(s:str): print(remove_neg_zero(string)) # outputs: "The temperature is 0.000 degrees." """ - return neg_zero_regex.sub(r'\1', s) \ No newline at end of file + return neg_zero_regex.sub(r'\1', s) + + +def parse_as_dict(s:str, item_sep:str=',', key_value_sep:str='='): + """ + Parse a string into a dictionary based on given item and key-value separators. + + Parameters + ---------- + s : str + The input string to be parsed into a dictionary. + item_sep : (optional) str, default = ',' + The separator between items + key_value_sep : (optional) str, default = '=' + The separator between keys and values + + Returns + ------- + dict + A dictionary containing the parsed key-value pairs. + + Examples + -------- + >>> parse_as_dict("name='John',age=25") + {'name': 'John', 'age': 25} + """ + tokens = split_str(s, sep=item_sep, strip=True, remove_empty=True) + result = {} + for token in tokens: + subtokens = split_str(token, sep=key_value_sep, strip=True) + if len(subtokens) != 2: + raise ValueError(f'invalid key-value format: {token}') + key, value = subtokens + if key in result: + raise RuntimeError(f'multiple values specified for the key "{key}"') + result[key] = ast.literal_eval(value) + return result + \ No newline at end of file