From 6be194c82b44677d32bd9545f1d3f1d92677c32d Mon Sep 17 00:00:00 2001
From: Carla Marin <cmarin@ecm.ub.edu>
Date: Thu, 18 Jan 2018 21:17:16 +0100
Subject: [PATCH 1/4] debug syst_toys to run fixed_params toys

---
 analysis/toys/syst_toys.py   | 10 +++++-----
 analysis/toys/systematics.py | 27 +++++++++++++++------------
 2 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/analysis/toys/syst_toys.py b/analysis/toys/syst_toys.py
index 7877665..0939fea 100644
--- a/analysis/toys/syst_toys.py
+++ b/analysis/toys/syst_toys.py
@@ -111,7 +111,7 @@ def run(config_files, link_from, verbose):
     # Set seed
     job_id = get_job_id()
     # Start looping
-    fit_results = defaultdict(list)
+    data_res = []
     logger.info("Starting sampling-fit loop (print frequency is 20)")
     initial_mem = memory_usage()
     initial_time = default_timer()
@@ -135,13 +135,13 @@ def run(config_files, link_from, verbose):
         except ValueError:
             raise RuntimeError()
         except Exception:
-            logger.exception()
+            #logger.exception()
             raise RuntimeError()  # TODO: provide more information?
         # Now results are in fit_parameters
         result = FitResult.from_roofit(fit_result).to_plain_dict()
         result['fitnum'] = fit_num
         result['seed'] = seed
-        fit_results[fit_num].append(result)
+        data_res.append(result)
         _root.destruct_object(fit_result)
         _root.destruct_object(dataset)
         logger.debug("Cleaning up")
@@ -149,7 +149,7 @@ def run(config_files, link_from, verbose):
     logger.info("--> Memory leakage: %.2f MB/sample-fit", (memory_usage() - initial_mem)/nfits)
     logger.info("--> Spent %.0f ms/sample-fit", (default_timer() - initial_time)*1000.0/nfits)
     logger.info("Saving to disk")
-    data_frame = pd.DataFrame(fit_results)
+    data_frame = pd.DataFrame(data_res)
     fit_result_frame = pd.concat([data_frame,
                                   pd.concat([pd.DataFrame({'jobid': [job_id]})]
                                             * data_frame.shape[0]).reset_index(drop=True)],
@@ -163,7 +163,7 @@ def run(config_files, link_from, verbose):
                 # First fit results
                 hdf_file.append('fit_results', fit_result_frame)
                 # Generator info
-                hdf_file.append('input_values', pd.DataFrame([systematic.get_input_values()]))
+                hdf_file.append('input_values', pd.DataFrame(systematic.get_input_values()))
             logger.info("Written output to %s", toy_fit_file)
             if 'link-from' in config:
                 logger.info("Linked to %s", config['link-from'])
diff --git a/analysis/toys/systematics.py b/analysis/toys/systematics.py
index 5d80272..4e8776c 100644
--- a/analysis/toys/systematics.py
+++ b/analysis/toys/systematics.py
@@ -117,7 +117,7 @@ class SystematicToys(object):
         """
         # TODO: Add weights?
         if randomize:
-            self.logger("Applying randomization")
+            logger.debug("Applying randomization")
             self.randomize()
         obs = list_to_rooargset(self._model.get_observables())
         datasets_to_merge = []
@@ -175,8 +175,8 @@ class SystematicToys(object):
 
         """
         if self._input_values is None:
-            self._input_values = {var.GetName(): [var.getVal()]
-                                  for var in self._model.get_gen_parameters() + self._model.get_yield_vars()}
+            self._input_values = {"{}_gen".format(var.GetName()): [var.getVal()]
+                                  for var in list(self._model.get_gen_parameters()) + self._model.get_yield_vars()}
         return self._input_values
 
 
@@ -239,19 +239,21 @@ class FixedParamsSyst(SystematicToys):
             fit_result = FitResult.from_yaml_file(result_config['result'])
             self._param_translation.update(result_config['param_names'])
             cov_matrices.append(fit_result.get_covariance_matrix(self._param_translation.keys()))
-            central_values.append(np.diag([float(fit_result.get_fit_parameter(param)[0])
-                                           for param in self._param_translation.keys()]))
+            central_values.extend([float(fit_result.get_fit_parameter(param)[0])
+                                           for param in self._param_translation.keys()])
         # Check that there is a correspondence between the fit result and parameters in the generation PDF
         self._cov_matrix = make_block(*cov_matrices)
-        self._central_values = make_block(*central_values)
+        self._central_values = central_values
         self._pdf_index = {}
         for fit_param in self._param_translation.values():
             found = False
-            for pdf_num, pdf in enumerate(self._gen_pdfs):
-                if fit_param in [var.GetName() for var in iterate_roocollection(pdf.getVariables())]:
-                    self._pdf_index[fit_param] = pdf_num
-                    found = True
-                    break
+            for label, pdf_list in self._gen_pdfs.items():
+                for pdf_num, pdf in enumerate(pdf_list):
+                    if fit_param in [var.GetName() for var in iterate_roocollection(pdf.getVariables())]:
+                        self._pdf_index[fit_param] = (label, pdf_num)
+                        found = True
+                        break
+                if found: break
             if not found:
                 raise RuntimeError("Cannot find parameter {} in the physics model".format(fit_param))
 
@@ -267,7 +269,8 @@ class FixedParamsSyst(SystematicToys):
         """
         random_values = np.random.multivariate_normal(self._central_values, self._cov_matrix)
         for param_num, (param_name, pdf_index) in enumerate(self._pdf_index.items()):
-            self._gen_pdfs[pdf_index].getVariables[param_name].setVal(random_values[param_num])
+            pdf_label, pdf_num = pdf_index
+            self._gen_pdfs[pdf_label][pdf_num].getVariables()[param_name].setVal(random_values[param_num])
         return len(random_values)
 
 
-- 
GitLab


From 84086c864b239e5a7c822be45a4ade926ed0277a Mon Sep 17 00:00:00 2001
From: Carla Marin <cmarin@ecm.ub.edu>
Date: Fri, 19 Jan 2018 08:48:03 +0100
Subject: [PATCH 2/4] save covariance matrix and minor changes

---
 analysis/toys/syst_toys.py   | 29 ++++++++++++++++++++++++++---
 analysis/toys/systematics.py |  5 +++--
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/analysis/toys/syst_toys.py b/analysis/toys/syst_toys.py
index 0939fea..59f6a74 100644
--- a/analysis/toys/syst_toys.py
+++ b/analysis/toys/syst_toys.py
@@ -13,6 +13,7 @@ These generate and fit in one go with some random variation of one or more param
 from __future__ import print_function, division, absolute_import
 
 import argparse
+import os
 from collections import defaultdict
 from timeit import default_timer
 
@@ -111,7 +112,7 @@ def run(config_files, link_from, verbose):
     # Set seed
     job_id = get_job_id()
     # Start looping
-    data_res = []
+    fit_results = defaultdict()
     logger.info("Starting sampling-fit loop (print frequency is 20)")
     initial_mem = memory_usage()
     initial_time = default_timer()
@@ -138,10 +139,13 @@ def run(config_files, link_from, verbose):
             #logger.exception()
             raise RuntimeError()  # TODO: provide more information?
         # Now results are in fit_parameters
-        result = FitResult.from_roofit(fit_result).to_plain_dict()
+        result_roofit = FitResult.from_roofit(fit_result)
+        result = result_roofit.to_plain_dict()
+        result['cov_matrix'] = result_roofit.get_covariance_matrix()
+        result['param_names'] = result_roofit.get_fit_parameters().keys()
         result['fitnum'] = fit_num
         result['seed'] = seed
-        data_res.append(result)
+        fit_results[fit_num] = result
         _root.destruct_object(fit_result)
         _root.destruct_object(dataset)
         logger.debug("Cleaning up")
@@ -149,6 +153,20 @@ def run(config_files, link_from, verbose):
     logger.info("--> Memory leakage: %.2f MB/sample-fit", (memory_usage() - initial_mem)/nfits)
     logger.info("--> Spent %.0f ms/sample-fit", (default_timer() - initial_time)*1000.0/nfits)
     logger.info("Saving to disk")
+    data_res = []
+    cov_matrices = {}
+    # Get covariance matrices
+    for fit_num, fit_res in fit_results.items():
+        fit_res = fit_res.copy()
+        fit_res['model_name'] = model_name # needed for indexing
+        fit_res['fit_strategy'] = fit_strategy
+        
+        cov_folder = os.path.join(str(job_id), str(fit_res['fitnum']))
+        param_names = fit_res.pop('param_names')
+        cov_matrices[cov_folder] = pd.DataFrame(fit_res.pop('cov_matrix'),
+                                                index=param_names,
+                                                columns=param_names)
+        data_res.append(fit_res)
     data_frame = pd.DataFrame(data_res)
     fit_result_frame = pd.concat([data_frame,
                                   pd.concat([pd.DataFrame({'jobid': [job_id]})]
@@ -162,8 +180,13 @@ def run(config_files, link_from, verbose):
             with modify_hdf(toy_fit_file) as hdf_file:
                 # First fit results
                 hdf_file.append('fit_results', fit_result_frame)
+                # Save covarinance matrix under 'covariance/jobid/fitnum
+                for cov_folder, cov_matrix in cov_matrices.items():
+                    cov_path = os.path.join('covariance', cov_folder)
+                    hdf_file.append(cov_path, cov_matrix)
                 # Generator info
                 hdf_file.append('input_values', pd.DataFrame(systematic.get_input_values()))
+                
             logger.info("Written output to %s", toy_fit_file)
             if 'link-from' in config:
                 logger.info("Linked to %s", config['link-from'])
diff --git a/analysis/toys/systematics.py b/analysis/toys/systematics.py
index 4e8776c..dc58a58 100644
--- a/analysis/toys/systematics.py
+++ b/analysis/toys/systematics.py
@@ -243,7 +243,7 @@ class FixedParamsSyst(SystematicToys):
                                            for param in self._param_translation.keys()])
         # Check that there is a correspondence between the fit result and parameters in the generation PDF
         self._cov_matrix = make_block(*cov_matrices)
-        self._central_values = central_values
+        self._central_values = np.array(central_values)
         self._pdf_index = {}
         for fit_param in self._param_translation.values():
             found = False
@@ -253,7 +253,8 @@ class FixedParamsSyst(SystematicToys):
                         self._pdf_index[fit_param] = (label, pdf_num)
                         found = True
                         break
-                if found: break
+                if found:
+                    break
             if not found:
                 raise RuntimeError("Cannot find parameter {} in the physics model".format(fit_param))
 
-- 
GitLab


From 33e6861c2607f52fcbf9b7306dd2f3438b6540a8 Mon Sep 17 00:00:00 2001
From: Carla Marin <cmarin@ecm.ub.edu>
Date: Fri, 19 Jan 2018 10:39:59 +0100
Subject: [PATCH 3/4] rm _param_translation from self

---
 analysis/toys/systematics.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/analysis/toys/systematics.py b/analysis/toys/systematics.py
index dc58a58..6f3bdf6 100644
--- a/analysis/toys/systematics.py
+++ b/analysis/toys/systematics.py
@@ -230,22 +230,22 @@ class FixedParamsSyst(SystematicToys):
         super(FixedParamsSyst, self).__init__(model, config=config, acceptance=acceptance)
         cov_matrices = []
         central_values = []
-        self._param_translation = OrderedDict()
+        param_translation = OrderedDict()
         # Load fit results and their covariance matrices
         syst = config['params']
         if not isinstance(syst, (list, tuple)):
             syst = [syst]
         for result_config in syst:
             fit_result = FitResult.from_yaml_file(result_config['result'])
-            self._param_translation.update(result_config['param_names'])
-            cov_matrices.append(fit_result.get_covariance_matrix(self._param_translation.keys()))
+            param_translation.update(result_config['param_names'])
+            cov_matrices.append(fit_result.get_covariance_matrix(param_translation.keys()))
             central_values.extend([float(fit_result.get_fit_parameter(param)[0])
-                                           for param in self._param_translation.keys()])
+                                           for param in param_translation.keys()])
         # Check that there is a correspondence between the fit result and parameters in the generation PDF
         self._cov_matrix = make_block(*cov_matrices)
         self._central_values = np.array(central_values)
         self._pdf_index = {}
-        for fit_param in self._param_translation.values():
+        for fit_param in param_translation.values():
             found = False
             for label, pdf_list in self._gen_pdfs.items():
                 for pdf_num, pdf in enumerate(pdf_list):
-- 
GitLab


From 16fcdea7d5e50dc4d67e10766e0df5be5bf87ade Mon Sep 17 00:00:00 2001
From: Carla Marin <cmarin@ecm.ub.edu>
Date: Fri, 19 Jan 2018 11:02:11 +0100
Subject: [PATCH 4/4] use plain dict

---
 analysis/toys/syst_toys.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/analysis/toys/syst_toys.py b/analysis/toys/syst_toys.py
index 59f6a74..a8f79a5 100644
--- a/analysis/toys/syst_toys.py
+++ b/analysis/toys/syst_toys.py
@@ -14,7 +14,6 @@ from __future__ import print_function, division, absolute_import
 
 import argparse
 import os
-from collections import defaultdict
 from timeit import default_timer
 
 import pandas as pd
@@ -112,7 +111,7 @@ def run(config_files, link_from, verbose):
     # Set seed
     job_id = get_job_id()
     # Start looping
-    fit_results = defaultdict()
+    fit_results = {}
     logger.info("Starting sampling-fit loop (print frequency is 20)")
     initial_mem = memory_usage()
     initial_time = default_timer()
-- 
GitLab