From 6be194c82b44677d32bd9545f1d3f1d92677c32d Mon Sep 17 00:00:00 2001 From: Carla Marin <cmarin@ecm.ub.edu> Date: Thu, 18 Jan 2018 21:17:16 +0100 Subject: [PATCH 1/4] debug syst_toys to run fixed_params toys --- analysis/toys/syst_toys.py | 10 +++++----- analysis/toys/systematics.py | 27 +++++++++++++++------------ 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/analysis/toys/syst_toys.py b/analysis/toys/syst_toys.py index 7877665..0939fea 100644 --- a/analysis/toys/syst_toys.py +++ b/analysis/toys/syst_toys.py @@ -111,7 +111,7 @@ def run(config_files, link_from, verbose): # Set seed job_id = get_job_id() # Start looping - fit_results = defaultdict(list) + data_res = [] logger.info("Starting sampling-fit loop (print frequency is 20)") initial_mem = memory_usage() initial_time = default_timer() @@ -135,13 +135,13 @@ def run(config_files, link_from, verbose): except ValueError: raise RuntimeError() except Exception: - logger.exception() + #logger.exception() raise RuntimeError() # TODO: provide more information? # Now results are in fit_parameters result = FitResult.from_roofit(fit_result).to_plain_dict() result['fitnum'] = fit_num result['seed'] = seed - fit_results[fit_num].append(result) + data_res.append(result) _root.destruct_object(fit_result) _root.destruct_object(dataset) logger.debug("Cleaning up") @@ -149,7 +149,7 @@ def run(config_files, link_from, verbose): logger.info("--> Memory leakage: %.2f MB/sample-fit", (memory_usage() - initial_mem)/nfits) logger.info("--> Spent %.0f ms/sample-fit", (default_timer() - initial_time)*1000.0/nfits) logger.info("Saving to disk") - data_frame = pd.DataFrame(fit_results) + data_frame = pd.DataFrame(data_res) fit_result_frame = pd.concat([data_frame, pd.concat([pd.DataFrame({'jobid': [job_id]})] * data_frame.shape[0]).reset_index(drop=True)], @@ -163,7 +163,7 @@ def run(config_files, link_from, verbose): # First fit results hdf_file.append('fit_results', fit_result_frame) # Generator info - hdf_file.append('input_values', pd.DataFrame([systematic.get_input_values()])) + hdf_file.append('input_values', pd.DataFrame(systematic.get_input_values())) logger.info("Written output to %s", toy_fit_file) if 'link-from' in config: logger.info("Linked to %s", config['link-from']) diff --git a/analysis/toys/systematics.py b/analysis/toys/systematics.py index 5d80272..4e8776c 100644 --- a/analysis/toys/systematics.py +++ b/analysis/toys/systematics.py @@ -117,7 +117,7 @@ class SystematicToys(object): """ # TODO: Add weights? if randomize: - self.logger("Applying randomization") + logger.debug("Applying randomization") self.randomize() obs = list_to_rooargset(self._model.get_observables()) datasets_to_merge = [] @@ -175,8 +175,8 @@ class SystematicToys(object): """ if self._input_values is None: - self._input_values = {var.GetName(): [var.getVal()] - for var in self._model.get_gen_parameters() + self._model.get_yield_vars()} + self._input_values = {"{}_gen".format(var.GetName()): [var.getVal()] + for var in list(self._model.get_gen_parameters()) + self._model.get_yield_vars()} return self._input_values @@ -239,19 +239,21 @@ class FixedParamsSyst(SystematicToys): fit_result = FitResult.from_yaml_file(result_config['result']) self._param_translation.update(result_config['param_names']) cov_matrices.append(fit_result.get_covariance_matrix(self._param_translation.keys())) - central_values.append(np.diag([float(fit_result.get_fit_parameter(param)[0]) - for param in self._param_translation.keys()])) + central_values.extend([float(fit_result.get_fit_parameter(param)[0]) + for param in self._param_translation.keys()]) # Check that there is a correspondence between the fit result and parameters in the generation PDF self._cov_matrix = make_block(*cov_matrices) - self._central_values = make_block(*central_values) + self._central_values = central_values self._pdf_index = {} for fit_param in self._param_translation.values(): found = False - for pdf_num, pdf in enumerate(self._gen_pdfs): - if fit_param in [var.GetName() for var in iterate_roocollection(pdf.getVariables())]: - self._pdf_index[fit_param] = pdf_num - found = True - break + for label, pdf_list in self._gen_pdfs.items(): + for pdf_num, pdf in enumerate(pdf_list): + if fit_param in [var.GetName() for var in iterate_roocollection(pdf.getVariables())]: + self._pdf_index[fit_param] = (label, pdf_num) + found = True + break + if found: break if not found: raise RuntimeError("Cannot find parameter {} in the physics model".format(fit_param)) @@ -267,7 +269,8 @@ class FixedParamsSyst(SystematicToys): """ random_values = np.random.multivariate_normal(self._central_values, self._cov_matrix) for param_num, (param_name, pdf_index) in enumerate(self._pdf_index.items()): - self._gen_pdfs[pdf_index].getVariables[param_name].setVal(random_values[param_num]) + pdf_label, pdf_num = pdf_index + self._gen_pdfs[pdf_label][pdf_num].getVariables()[param_name].setVal(random_values[param_num]) return len(random_values) -- GitLab From 84086c864b239e5a7c822be45a4ade926ed0277a Mon Sep 17 00:00:00 2001 From: Carla Marin <cmarin@ecm.ub.edu> Date: Fri, 19 Jan 2018 08:48:03 +0100 Subject: [PATCH 2/4] save covariance matrix and minor changes --- analysis/toys/syst_toys.py | 29 ++++++++++++++++++++++++++--- analysis/toys/systematics.py | 5 +++-- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/analysis/toys/syst_toys.py b/analysis/toys/syst_toys.py index 0939fea..59f6a74 100644 --- a/analysis/toys/syst_toys.py +++ b/analysis/toys/syst_toys.py @@ -13,6 +13,7 @@ These generate and fit in one go with some random variation of one or more param from __future__ import print_function, division, absolute_import import argparse +import os from collections import defaultdict from timeit import default_timer @@ -111,7 +112,7 @@ def run(config_files, link_from, verbose): # Set seed job_id = get_job_id() # Start looping - data_res = [] + fit_results = defaultdict() logger.info("Starting sampling-fit loop (print frequency is 20)") initial_mem = memory_usage() initial_time = default_timer() @@ -138,10 +139,13 @@ def run(config_files, link_from, verbose): #logger.exception() raise RuntimeError() # TODO: provide more information? # Now results are in fit_parameters - result = FitResult.from_roofit(fit_result).to_plain_dict() + result_roofit = FitResult.from_roofit(fit_result) + result = result_roofit.to_plain_dict() + result['cov_matrix'] = result_roofit.get_covariance_matrix() + result['param_names'] = result_roofit.get_fit_parameters().keys() result['fitnum'] = fit_num result['seed'] = seed - data_res.append(result) + fit_results[fit_num] = result _root.destruct_object(fit_result) _root.destruct_object(dataset) logger.debug("Cleaning up") @@ -149,6 +153,20 @@ def run(config_files, link_from, verbose): logger.info("--> Memory leakage: %.2f MB/sample-fit", (memory_usage() - initial_mem)/nfits) logger.info("--> Spent %.0f ms/sample-fit", (default_timer() - initial_time)*1000.0/nfits) logger.info("Saving to disk") + data_res = [] + cov_matrices = {} + # Get covariance matrices + for fit_num, fit_res in fit_results.items(): + fit_res = fit_res.copy() + fit_res['model_name'] = model_name # needed for indexing + fit_res['fit_strategy'] = fit_strategy + + cov_folder = os.path.join(str(job_id), str(fit_res['fitnum'])) + param_names = fit_res.pop('param_names') + cov_matrices[cov_folder] = pd.DataFrame(fit_res.pop('cov_matrix'), + index=param_names, + columns=param_names) + data_res.append(fit_res) data_frame = pd.DataFrame(data_res) fit_result_frame = pd.concat([data_frame, pd.concat([pd.DataFrame({'jobid': [job_id]})] @@ -162,8 +180,13 @@ def run(config_files, link_from, verbose): with modify_hdf(toy_fit_file) as hdf_file: # First fit results hdf_file.append('fit_results', fit_result_frame) + # Save covarinance matrix under 'covariance/jobid/fitnum + for cov_folder, cov_matrix in cov_matrices.items(): + cov_path = os.path.join('covariance', cov_folder) + hdf_file.append(cov_path, cov_matrix) # Generator info hdf_file.append('input_values', pd.DataFrame(systematic.get_input_values())) + logger.info("Written output to %s", toy_fit_file) if 'link-from' in config: logger.info("Linked to %s", config['link-from']) diff --git a/analysis/toys/systematics.py b/analysis/toys/systematics.py index 4e8776c..dc58a58 100644 --- a/analysis/toys/systematics.py +++ b/analysis/toys/systematics.py @@ -243,7 +243,7 @@ class FixedParamsSyst(SystematicToys): for param in self._param_translation.keys()]) # Check that there is a correspondence between the fit result and parameters in the generation PDF self._cov_matrix = make_block(*cov_matrices) - self._central_values = central_values + self._central_values = np.array(central_values) self._pdf_index = {} for fit_param in self._param_translation.values(): found = False @@ -253,7 +253,8 @@ class FixedParamsSyst(SystematicToys): self._pdf_index[fit_param] = (label, pdf_num) found = True break - if found: break + if found: + break if not found: raise RuntimeError("Cannot find parameter {} in the physics model".format(fit_param)) -- GitLab From 33e6861c2607f52fcbf9b7306dd2f3438b6540a8 Mon Sep 17 00:00:00 2001 From: Carla Marin <cmarin@ecm.ub.edu> Date: Fri, 19 Jan 2018 10:39:59 +0100 Subject: [PATCH 3/4] rm _param_translation from self --- analysis/toys/systematics.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/analysis/toys/systematics.py b/analysis/toys/systematics.py index dc58a58..6f3bdf6 100644 --- a/analysis/toys/systematics.py +++ b/analysis/toys/systematics.py @@ -230,22 +230,22 @@ class FixedParamsSyst(SystematicToys): super(FixedParamsSyst, self).__init__(model, config=config, acceptance=acceptance) cov_matrices = [] central_values = [] - self._param_translation = OrderedDict() + param_translation = OrderedDict() # Load fit results and their covariance matrices syst = config['params'] if not isinstance(syst, (list, tuple)): syst = [syst] for result_config in syst: fit_result = FitResult.from_yaml_file(result_config['result']) - self._param_translation.update(result_config['param_names']) - cov_matrices.append(fit_result.get_covariance_matrix(self._param_translation.keys())) + param_translation.update(result_config['param_names']) + cov_matrices.append(fit_result.get_covariance_matrix(param_translation.keys())) central_values.extend([float(fit_result.get_fit_parameter(param)[0]) - for param in self._param_translation.keys()]) + for param in param_translation.keys()]) # Check that there is a correspondence between the fit result and parameters in the generation PDF self._cov_matrix = make_block(*cov_matrices) self._central_values = np.array(central_values) self._pdf_index = {} - for fit_param in self._param_translation.values(): + for fit_param in param_translation.values(): found = False for label, pdf_list in self._gen_pdfs.items(): for pdf_num, pdf in enumerate(pdf_list): -- GitLab From 16fcdea7d5e50dc4d67e10766e0df5be5bf87ade Mon Sep 17 00:00:00 2001 From: Carla Marin <cmarin@ecm.ub.edu> Date: Fri, 19 Jan 2018 11:02:11 +0100 Subject: [PATCH 4/4] use plain dict --- analysis/toys/syst_toys.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/analysis/toys/syst_toys.py b/analysis/toys/syst_toys.py index 59f6a74..a8f79a5 100644 --- a/analysis/toys/syst_toys.py +++ b/analysis/toys/syst_toys.py @@ -14,7 +14,6 @@ from __future__ import print_function, division, absolute_import import argparse import os -from collections import defaultdict from timeit import default_timer import pandas as pd @@ -112,7 +111,7 @@ def run(config_files, link_from, verbose): # Set seed job_id = get_job_id() # Start looping - fit_results = defaultdict() + fit_results = {} logger.info("Starting sampling-fit loop (print frequency is 20)") initial_mem = memory_usage() initial_time = default_timer() -- GitLab