From 5205ac6132340401d5415bf01112cd1bba2771af Mon Sep 17 00:00:00 2001 From: Jonas Eschle 'Mayou36 <mayou36@jonas.eschle.com> Date: Fri, 3 Nov 2017 18:59:11 +0100 Subject: [PATCH 1/7] wip: from_plain_dict and from_toys_hdf added, basics working (untested) --- analysis/fit/result.py | 98 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 2 deletions(-) diff --git a/analysis/fit/result.py b/analysis/fit/result.py index f86eda6..03231d5 100644 --- a/analysis/fit/result.py +++ b/analysis/fit/result.py @@ -9,12 +9,15 @@ from collections import OrderedDict import copy +import os +import warnings import numpy as np +import pandas as pd from analysis.utils.config import load_config, write_config, ConfigError from analysis.utils.root import iterate_roocollection -from analysis.utils.paths import get_fit_result_path +from analysis.utils.paths import get_fit_result_path, get_toy_fit_path _SUFFIXES = ('', '_err_hesse', '_err_plus', '_err_minus') @@ -167,7 +170,7 @@ class FitResult(object): return self @ensure_non_initialized - def from_hdf(self, name): # TODO: which path func? + def from_toy_hdf(self, name, iloc=None): # TODO: which path func? """Initialize from a hdf file. Arguments: @@ -177,7 +180,98 @@ class FitResult(object): self """ + result = {} + # obtain dataframe + with pd.HDFStore(get_toy_fit_path(name), mode='r') as store: # TODO: is toy fit path right? + toy_results = store['fit_results'] + fit_results = toy_results['fit_results'] + + # extract the right fit + if iloc: + fit_result = fit_results.iloc[iloc] + elif False: + pass + else: + raise ValueError("No matching key is given for the toy fit result.") + + fit_result = dict(fit_result) + + # extract jobid, fitnum + jobid = fit_result.pop('jobid') + fitnum = fit_result.pop('fitnum') + + # obtain covariance matrix + cov_matrix_path = os.path.join('covariance', str(jobid), str(fitnum)) + with pd.HDFStore(get_toy_fit_path(name), mode='r') as store: + cov_matrix = store[cov_matrix_path] + + # store results + self.from_plain_dict(plain_dict=fit_result, skip_cov=True) + + self._result['covariance-matrix']['matrix'] = cov_matrix + + self._result = result + + + + return self + @ensure_non_initialized + def from_plain_dict(self, plain_dict, skip_cov=True): + """Initialize from a dict *plain_dict*, inverse to :py:func:`~FitResult.to_plain_dict` + + Arguments: + plain_dict (dict): Dict containing the fit result as created by + :py:func:`~FitResult.to_plain_dict` + skip_cov (bool): If True, the covariance matrix is not stored. + + Return: + self + + """ + # TODO: dirty extraction: relies on order: parameters, consts, other stuff + + result = {} + plain_dict = plain_dict.copy() + + # extract the parameters + # TODO: better extraction method? + possible_params = list(plain_dict.keys()) + fit_parameters = OrderedDict() + n_suffixes = len(_SUFFIXES) + while len(possible_params) >= n_suffixes and all(isinstance(p, str) for p in possible_params[:n_suffixes]): + if n_suffixes == 0: + break + param_name = possible_params[0][:max(len(possible_params[0]) - len(_SUFFIXES[0]), 0)] + if all((param_name + suf == p for p, suf in zip(possible_params, _SUFFIXES))): + fit_parameters[param_name] = (plain_dict.pop(name) for name in possible_params[:n_suffixes]) + possible_params = possible_params[n_suffixes:] + result['fit-parameters'] = fit_parameters + + n_const_params = result.index('status_migrad') # TODO: better limit the const params + result['const-parameters'] = OrderedDict(plain_dict.popitem(last=False) + for _ in range(n_const_params)) + + # TODO: parameter-initial missing (not stored in hdf currently) + + fit_status = {'MIGRAD', plain_dict.pop('status_migrad'), + 'HESSE', plain_dict.pop('status_hesse'), + 'MINOS', plain_dict.pop('status_minos')} + result['status'] = fit_status + result['edm'] = plain_dict.pop('edm') + result['covariance-matrix'] = {'quality': plain_dict.pop('cov_quality')} + if skip_cov: + plain_dict.pop('cov_matrix', None) # to see whats left over + else: + # TODO: convert cov_matrix as it is given as A1 in to_plain_dict + result['covariance-matrix']['matrix'] = plain_dict['cov_matrix'] + + if plain_dict: # TODO: warning? or even assert? + warnings.warn('Possible information loss! The following information ' + 'was contained in the HDF but is not in the FitResult: ' + '{}'.format(plain_dict)) + + self._result = result return self @ensure_initialized -- GitLab From f11ae86776cfc164fe16060d29b4d23e70c26436 Mon Sep 17 00:00:00 2001 From: Jonas Eschle 'Mayou36 <mayou36@jonas.eschle.com> Date: Sat, 4 Nov 2017 15:19:28 +0100 Subject: [PATCH 2/7] from_toy_fit basic body setup --- analysis/fit/result.py | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/analysis/fit/result.py b/analysis/fit/result.py index 03231d5..299b482 100644 --- a/analysis/fit/result.py +++ b/analysis/fit/result.py @@ -174,7 +174,9 @@ class FitResult(object): """Initialize from a hdf file. Arguments: - name (str): + name (str): name of the hdf file + iloc (int): The Index based LOCation of the fit result, as + in :py:meth:`pd.DataFrame.iloc` Return: self @@ -205,14 +207,25 @@ class FitResult(object): with pd.HDFStore(get_toy_fit_path(name), mode='r') as store: cov_matrix = store[cov_matrix_path] + # get toy specific values + seed = fit_result.pop('seed') + fit_strategy = fit_result.pop('fit_strategy') + model_name = fit_result.pop('model_name') + # store results - self.from_plain_dict(plain_dict=fit_result, skip_cov=True) + self.from_plain_dict(plain_dict=fit_result, skip_cov=True) # skip as we have matrix, not A1 + # store covariance matrix self._result['covariance-matrix']['matrix'] = cov_matrix - self._result = result - + # store toy specific values + self._result['jobid'] = jobid + self._result['fitnum'] = fitnum + self._result['seed'] = seed + self._result['fit_strategy'] = fit_strategy + self._result['model_name'] = model_name + self._result = result return self @@ -238,14 +251,14 @@ class FitResult(object): # TODO: better extraction method? possible_params = list(plain_dict.keys()) fit_parameters = OrderedDict() - n_suffixes = len(_SUFFIXES) - while len(possible_params) >= n_suffixes and all(isinstance(p, str) for p in possible_params[:n_suffixes]): - if n_suffixes == 0: + while (len(possible_params) >= len(_SUFFIXES) and + all(isinstance(p, str) for p in possible_params[:len(_SUFFIXES)])): + if len(_SUFFIXES) == 0: break param_name = possible_params[0][:max(len(possible_params[0]) - len(_SUFFIXES[0]), 0)] if all((param_name + suf == p for p, suf in zip(possible_params, _SUFFIXES))): - fit_parameters[param_name] = (plain_dict.pop(name) for name in possible_params[:n_suffixes]) - possible_params = possible_params[n_suffixes:] + fit_parameters[param_name] = (plain_dict.pop(name) for name in possible_params[:len(_SUFFIXES)]) + possible_params = possible_params[len(_SUFFIXES):] result['fit-parameters'] = fit_parameters n_const_params = result.index('status_migrad') # TODO: better limit the const params @@ -322,9 +335,11 @@ class FitResult(object): pandas.DataFrame """ + # do NOT change order below pandas_dict = OrderedDict(((param_name + suffix, val) for param_name, param in self._result['fit-parameters'].items() for val, suffix in zip(param, _SUFFIXES))) + # do NOT change order below pandas_dict.update(OrderedDict((param_name, val) for param_name, val in self._result['const-parameters'].items())) pandas_dict['status_migrad'] = self._result['status'].get('MIGRAD', -1) -- GitLab From 846f3d5f54ef521815b0c3b254b0a43e36aaae72 Mon Sep 17 00:00:00 2001 From: Jonas Eschle 'Mayou36 <mayou36@jonas.eschle.com> Date: Fri, 1 Dec 2017 16:56:49 +0100 Subject: [PATCH 3/7] Add toy covariance matrix loading --- analysis/toys/tools.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/analysis/toys/tools.py b/analysis/toys/tools.py index 8c6bcbb..17962b0 100644 --- a/analysis/toys/tools.py +++ b/analysis/toys/tools.py @@ -48,8 +48,18 @@ def load_toy_fits(*toy_list, **kwargs): if not all(os.path.exists(get_toy_fit_path(toy_name)) for toy_name in toy_list): raise OSError("Cannot load all toys") with contextlib2.ExitStack() as toy_stack: - fit_results = [toy_stack.enter_context(pd.HDFStore(get_toy_fit_path(toy_name), mode='r'))['fit_results'] - for toy_name in toy_list] + # toy_results = [] + fit_cov_matrices = [] + fit_results = [] + for toy_name in toy_list: + toy_result = toy_stack.enter_context(pd.HDFStore(get_toy_fit_path(toy_name), mode='r')) + fit_result = toy_result['fit_results'] + fit_results.append(fit_result) + cov_matrices = [] + for row in fit_result.iterrows(): + cov_path = os.path.join('covariance', row[1]['jobid'], row[1]['fit_num']) + cov_matrices.append(toy_result[cov_path]) + fit_cov_matrices.append(cov_matrices) if not all(all(fit_result.columns == fit_results[0].columns) for fit_result in fit_results): if kwargs.get('fail_on_incompatible', True): @@ -63,6 +73,6 @@ def load_toy_fits(*toy_list, **kwargs): '_{gen}' in col and not col.startswith('N^'), '_{nominal}' in col))] merged_result.set_index(indices, inplace=True) - return merged_result + return merged_result, fit_cov_matrices # EOF -- GitLab From e36d45b04d32bbb0290d266ebb36207223ec5cda Mon Sep 17 00:00:00 2001 From: Jonas Eschle 'Mayou36 <mayou36@jonas.eschle.com> Date: Mon, 4 Dec 2017 11:36:12 +0100 Subject: [PATCH 4/7] Remove fit_result load toy hdf --- analysis/fit/result.py | 125 +---------------------------------------- 1 file changed, 1 insertion(+), 124 deletions(-) diff --git a/analysis/fit/result.py b/analysis/fit/result.py index 299b482..85c7023 100644 --- a/analysis/fit/result.py +++ b/analysis/fit/result.py @@ -9,15 +9,12 @@ from collections import OrderedDict import copy -import os -import warnings import numpy as np -import pandas as pd from analysis.utils.config import load_config, write_config, ConfigError from analysis.utils.root import iterate_roocollection -from analysis.utils.paths import get_fit_result_path, get_toy_fit_path +from analysis.utils.paths import get_fit_result_path _SUFFIXES = ('', '_err_hesse', '_err_plus', '_err_minus') @@ -169,124 +166,6 @@ class FitResult(object): raise KeyError("Missing keys in input file -> {}".format(','.join(error.missing_keys))) return self - @ensure_non_initialized - def from_toy_hdf(self, name, iloc=None): # TODO: which path func? - """Initialize from a hdf file. - - Arguments: - name (str): name of the hdf file - iloc (int): The Index based LOCation of the fit result, as - in :py:meth:`pd.DataFrame.iloc` - - Return: - self - - """ - result = {} - # obtain dataframe - with pd.HDFStore(get_toy_fit_path(name), mode='r') as store: # TODO: is toy fit path right? - toy_results = store['fit_results'] - fit_results = toy_results['fit_results'] - - # extract the right fit - if iloc: - fit_result = fit_results.iloc[iloc] - elif False: - pass - else: - raise ValueError("No matching key is given for the toy fit result.") - - fit_result = dict(fit_result) - - # extract jobid, fitnum - jobid = fit_result.pop('jobid') - fitnum = fit_result.pop('fitnum') - - # obtain covariance matrix - cov_matrix_path = os.path.join('covariance', str(jobid), str(fitnum)) - with pd.HDFStore(get_toy_fit_path(name), mode='r') as store: - cov_matrix = store[cov_matrix_path] - - # get toy specific values - seed = fit_result.pop('seed') - fit_strategy = fit_result.pop('fit_strategy') - model_name = fit_result.pop('model_name') - - # store results - self.from_plain_dict(plain_dict=fit_result, skip_cov=True) # skip as we have matrix, not A1 - - # store covariance matrix - self._result['covariance-matrix']['matrix'] = cov_matrix - - # store toy specific values - self._result['jobid'] = jobid - self._result['fitnum'] = fitnum - self._result['seed'] = seed - self._result['fit_strategy'] = fit_strategy - self._result['model_name'] = model_name - - self._result = result - - return self - - @ensure_non_initialized - def from_plain_dict(self, plain_dict, skip_cov=True): - """Initialize from a dict *plain_dict*, inverse to :py:func:`~FitResult.to_plain_dict` - - Arguments: - plain_dict (dict): Dict containing the fit result as created by - :py:func:`~FitResult.to_plain_dict` - skip_cov (bool): If True, the covariance matrix is not stored. - - Return: - self - - """ - # TODO: dirty extraction: relies on order: parameters, consts, other stuff - - result = {} - plain_dict = plain_dict.copy() - - # extract the parameters - # TODO: better extraction method? - possible_params = list(plain_dict.keys()) - fit_parameters = OrderedDict() - while (len(possible_params) >= len(_SUFFIXES) and - all(isinstance(p, str) for p in possible_params[:len(_SUFFIXES)])): - if len(_SUFFIXES) == 0: - break - param_name = possible_params[0][:max(len(possible_params[0]) - len(_SUFFIXES[0]), 0)] - if all((param_name + suf == p for p, suf in zip(possible_params, _SUFFIXES))): - fit_parameters[param_name] = (plain_dict.pop(name) for name in possible_params[:len(_SUFFIXES)]) - possible_params = possible_params[len(_SUFFIXES):] - result['fit-parameters'] = fit_parameters - - n_const_params = result.index('status_migrad') # TODO: better limit the const params - result['const-parameters'] = OrderedDict(plain_dict.popitem(last=False) - for _ in range(n_const_params)) - - # TODO: parameter-initial missing (not stored in hdf currently) - - fit_status = {'MIGRAD', plain_dict.pop('status_migrad'), - 'HESSE', plain_dict.pop('status_hesse'), - 'MINOS', plain_dict.pop('status_minos')} - result['status'] = fit_status - result['edm'] = plain_dict.pop('edm') - result['covariance-matrix'] = {'quality': plain_dict.pop('cov_quality')} - if skip_cov: - plain_dict.pop('cov_matrix', None) # to see whats left over - else: - # TODO: convert cov_matrix as it is given as A1 in to_plain_dict - result['covariance-matrix']['matrix'] = plain_dict['cov_matrix'] - - if plain_dict: # TODO: warning? or even assert? - warnings.warn('Possible information loss! The following information ' - 'was contained in the HDF but is not in the FitResult: ' - '{}'.format(plain_dict)) - - self._result = result - return self - @ensure_initialized def to_yaml(self): """Convert fit result to YAML format. @@ -335,11 +214,9 @@ class FitResult(object): pandas.DataFrame """ - # do NOT change order below pandas_dict = OrderedDict(((param_name + suffix, val) for param_name, param in self._result['fit-parameters'].items() for val, suffix in zip(param, _SUFFIXES))) - # do NOT change order below pandas_dict.update(OrderedDict((param_name, val) for param_name, val in self._result['const-parameters'].items())) pandas_dict['status_migrad'] = self._result['status'].get('MIGRAD', -1) -- GitLab From ed39ba0a39d50fb73292e6447cd6209eb7d9a027 Mon Sep 17 00:00:00 2001 From: Jonas Eschle 'Mayou36 <mayou36@jonas.eschle.com> Date: Tue, 5 Dec 2017 19:28:15 +0100 Subject: [PATCH 5/7] Fix wrong parameter or doc --- analysis/toys/tools.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/analysis/toys/tools.py b/analysis/toys/tools.py index f3dd9f1..840fa05 100644 --- a/analysis/toys/tools.py +++ b/analysis/toys/tools.py @@ -19,7 +19,7 @@ from analysis.utils.logging_color import get_logger _logger = get_logger('analysis.toys.tools') -def load_toy_fits(*toy_list, **kwargs): +def load_toy_fits(*toys, **kwargs): """Load toy fit results. If several files are given, all the tables are merged. @@ -29,7 +29,7 @@ def load_toy_fits(*toy_list, **kwargs): is returned. Arguments: - *toy_list (list): List of toy names to load. + *toys (str): Toy names to load. **kwargs (dict): Extra options: + `index` (bool, optional): Index the data frame? Defaults to `True`. @@ -46,13 +46,13 @@ def load_toy_fits(*toy_list, **kwargs): """ # Check that toys exist - if not all(os.path.exists(get_toy_fit_path(toy_name)) for toy_name in toy_list): + if not all(os.path.exists(get_toy_fit_path(toy_name)) for toy_name in toys): raise OSError("Cannot load all toys") with contextlib2.ExitStack() as toy_stack: # toy_results = [] fit_cov_matrices = [] fit_results = [] - for toy_name in toy_list: + for toy_name in toys: toy_result = toy_stack.enter_context(pd.HDFStore(get_toy_fit_path(toy_name), mode='r')) fit_result = toy_result['fit_results'] fit_results.append(fit_result) -- GitLab From 699df286c8fda7fc71e35ba95b15513d17573723 Mon Sep 17 00:00:00 2001 From: Jonas Eschle 'Mayou36 <mayou36@jonas.eschle.com> Date: Tue, 5 Dec 2017 19:36:43 +0100 Subject: [PATCH 6/7] Return results as dict --- analysis/toys/tools.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/analysis/toys/tools.py b/analysis/toys/tools.py index 840fa05..40c59bc 100644 --- a/analysis/toys/tools.py +++ b/analysis/toys/tools.py @@ -48,6 +48,7 @@ def load_toy_fits(*toys, **kwargs): # Check that toys exist if not all(os.path.exists(get_toy_fit_path(toy_name)) for toy_name in toys): raise OSError("Cannot load all toys") + results = {} with contextlib2.ExitStack() as toy_stack: # toy_results = [] fit_cov_matrices = [] @@ -74,6 +75,8 @@ def load_toy_fits(*toys, **kwargs): '_{gen}' in col and not col.startswith('N^'), '_{nominal}' in col))] merged_result.set_index(indices, inplace=True) - return merged_result, fit_cov_matrices + results['fit_result'] = merged_result + results['cov_matrix'] = fit_cov_matrices + return results # EOF -- GitLab From daa8069e837eee86948095de70702bea2dc96baa Mon Sep 17 00:00:00 2001 From: Jonas Eschle 'Mayou36 <mayou36@jonas.eschle.com> Date: Tue, 5 Dec 2017 19:41:12 +0100 Subject: [PATCH 7/7] Changed wording to plural --- analysis/toys/tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analysis/toys/tools.py b/analysis/toys/tools.py index 40c59bc..a866f4e 100644 --- a/analysis/toys/tools.py +++ b/analysis/toys/tools.py @@ -75,8 +75,8 @@ def load_toy_fits(*toys, **kwargs): '_{gen}' in col and not col.startswith('N^'), '_{nominal}' in col))] merged_result.set_index(indices, inplace=True) - results['fit_result'] = merged_result - results['cov_matrix'] = fit_cov_matrices + results['fit_results'] = merged_result + results['cov_matrices'] = fit_cov_matrices return results # EOF -- GitLab