From 7f187ccaf00ec622756f9be2fc1071110bb2bcfd Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Fri, 15 Jul 2016 15:56:34 +0200
Subject: [PATCH 01/35] specialise catalogues

Start implementing `HbsmSampleCatalogue` and `VlqSampleCatalogue`
holding the analysis-specific details and procedures.
---
 python/sample_catalogues.py | 255 ++++++++++++++++++++++++++++--------
 1 file changed, 199 insertions(+), 56 deletions(-)

diff --git a/python/sample_catalogues.py b/python/sample_catalogues.py
index 9b4045c..cc81df5 100644
--- a/python/sample_catalogues.py
+++ b/python/sample_catalogues.py
@@ -5,17 +5,26 @@ All samples are initially provided in a txt file corresponding to one
 production (the file `datasets_to_download.txt` provided by Peyton).
 
 SampleCatalogue is the main class here. It takes care of:
-- organizing the samples in categories: data, ttbar, signal, etc.
+
+- organizing the samples in categories: data, ttbar, signal,
+  etc. Categories are flexible (just list of files) and they should be
+  implemented following the `categorise_all_samples` example.
 - TODO attaching systematic uncertainties to the samples
-- TODO attaching ChainProvider(s) to the samples
+- TODO attaching ChainProvider(s) to the Variation
 - TODO building the VLQAnalysis/data/samples_info.dat file
 
+Overall design:
+A Sample has a list of SystematicUncertainty; the 'nominal' case is
+just a SystematicUncertainty.is_nominal.
+Each uncertainty has one or two Variation.
+Each Variation has a ChainProvider
+
 Different analyses might need to access different samples, or to
 organize them in different ways (e.g. special treatment of signal
 samples, systematic samples, etc.). This is implemented by
 specializing SampleCatalogue.
 
-wTODO : systematic sample might need to attach chain_provider to each systematic uncertainty
+TODO : implement special treatment of systematic samples.
 
 davide.gerbaudo@gmail.com
 Jul 2016
@@ -31,27 +40,45 @@ from VLQAnalysis import utils
 # catalogue = systematics.SystematicCatalogue()
 
 
+#___________________________________________________________
 
 class Sample(object):
-    "Holds info about a sample and its attributes."
+    """Holds info about a sample and its attributes.
+    'full_name' is always there; the other attributes are filled as needed.
+    """
     def __init__(self, short_name=None, full_name=None, group=None, filelist_dir=None, filelist_file=None, ds_input=None):
         self.short_name = short_name
         self.full_name = full_name
-        self.group = group
+        self._group = group
         self.ds_input = ds_input
-        self.chain_provider = None
-        # self.systematic_uncertainties = [catalogue.nominal]
+        self.systematic_uncertainties = []
         if not full_name:
             raise ValueError("Sample must have full_name")
 
     def __str__(self):
         plain_attrs = ['short_name', 'full_name', 'group', 'ds_input']
+        # TODO how do you want to print the uncertainties (and the chainprovider)
         return "Sample :"+ ', '.join("%s : '%s'" % (a, va) for a, va in [(pa, getattr(self, pa)) for pa in plain_attrs] if va)
 
     @property
     def dsid(self):
+        "No need to input this by hand, just extract it from full_name"
         return utils.guess_id_from_name(samplename=self.full_name)
 
+    @property
+    def group(self):
+        return self._group
+
+    @group.setter
+    def group(self, value, overwrite_group=False):
+        "this attribute is set by SampleCatalogue so we need to check it and cache it"
+        if not sample._group or sample._group==value:
+            sample._group = group
+        elif overwrite_group:
+            sample._group = group
+        else:
+            pass
+
     def use_all_uncertainties(self):
         self.systematic_uncertainties = catalogue.all_uncertainties()
 
@@ -64,6 +91,7 @@ class Sample(object):
     def use_nominal_uncertainty(self):
         "nothing to do, this is the default"
         pass
+#___________________________________________________________
 
 class ChainProvider(object):
     "Provides access to a chain built out of files on disk/eos/pnfs"
@@ -77,6 +105,7 @@ class ChainProvider(object):
         print cmd
         print "# now replace the stuff before 'eos' with 'root://eosatlas/'"
 
+#___________________________________________________________
 
 class SampleCatalogue(object):
     """Keep track of a collection of samples.
@@ -91,6 +120,8 @@ class SampleCatalogue(object):
     - or from several files group files, in which case the filename is
       used as the group
 
+    In general one builds the catalogue from the group files.
+
     TODO attach syst uncertainties to samples
     TODO attach
 
@@ -99,22 +130,32 @@ class SampleCatalogue(object):
         self.samples = []
         self.verbose = False
 
-    def add_samples_from_file(self, path, line_parser = lambda l: Sample(full_name=l.strip())):
-        self.samples += [line_parser(l) for l in SampleCatalogue.read_lines_from_txt(path)]
-
     def add_samples_from_group_files(self, paths=[]):
-        """A wrapper so that we can read samples that have already been categorised.
+        """This is the usual method to populate the catalogue, from
+        samples that have altready been organised in groups.
+        The name of each group is determined from the filename
         Example:
         > catalogue.add_samples_categorised(glob.glob('path/to/files/*.txt')
-        The name of each group is determined from the filename
         """
         for path in paths:
             group = utils.filename_without_extension(path)
             self.add_samples_from_file(path, line_parser=lambda l: Sample(full_name=l.strip(), group=group))
 
-    def write_group_files(self, output_directory=None):
+    def add_samples_from_file(self, path, line_parser = lambda l: Sample(full_name=l.strip())):
+        """This should be used to populate the catalogue when you have a
+        new production. path should be the file 'datasets_to_download.txt'
+        generated from HtX4TopsNtuple
+        """
+        self.samples += [line_parser(l) for l in SampleCatalogue.read_lines_from_txt(path)]
+
+    def write_group_files(self, output_directory=None, allow_uncategorised_samples=True):
+        "After having called 'categorise_all_samples', you can write the categorization to group files."
         if self.has_uncategorised_samples:
             print "There are samples that do not belong to any group. The group files will not be complete."
+            if not allow_uncategorised_samples:
+                uncategorised = [s for s in self.samples if not s.group]
+                raise NotImplementedError("Do not know how to handle uncategorised samples:\n"+
+                                          '\n'.join(s.full_name for s in uncategorised))
         if not os.path.isdir(output_directory):
             raise IOError("'%s' is not a valid directory" % output_directory)
         samples_per_group = collections.defaultdict(list)
@@ -135,17 +176,7 @@ class SampleCatalogue(object):
 
     @classmethod
     def categorise_all_samples(cls, samples, overwrite_group=False, verbose=False):
-        "try to determine the group using 'determine_group_from_name'"
-        for sample in samples:
-            group = cls.determine_group_from_name(sample)
-            if not sample.group or sample.group==group:
-                sample.group = group
-            elif overwrite_group:
-                if verbose:
-                    print "overwriting group '%s' with '%s' for %s" % (sample.group, group, sample.full_name)
-                sample.group = group
-            else:
-                pass
+        raise NotImplementedError("This method should be implemented in the analysis-specific sub-classes")
 
     @classmethod
     def determine_group_from_name(cls, sample=None):
@@ -153,7 +184,7 @@ class SampleCatalogue(object):
 
         This is where the analysis-specific catalogues can implement their categorisation.
         """
-        return ('data' if 'physics_Main' in sample.full_name else
+        return ('data' if cls.is_data(sample) else
                 'ttbar' if cls.is_ttbar(sample) else
                 'wjets' if cls.is_wjets(sample) else
                 'zjets' if cls.is_zjets(sample) else
@@ -162,12 +193,12 @@ class SampleCatalogue(object):
                 'topewk' if cls.is_topewk(sample) else
                 'tth' if cls.is_tth(sample) else
                 'fourtop' if cls.is_fourtop(sample) else
-                'vlq' if cls.is_vlq(sample) else
-                'uerdpp' if cls.is_uerdpp(sample) else
-                'fourtopci' if cls.is_fourtopci(sample) else
-                'hbsm' if cls. is_hbsm(sample) else
                 None)
 
+    @staticmethod
+    def is_data(sample):
+        return 'physics_Main' in sample.full_name
+
     @staticmethod
     def is_ttbar(sample):
         return any(str(dsid) in sample.full_name for dsid in range(407009, 407012+1) + range(410000, 410004+1) + [410120])
@@ -206,27 +237,6 @@ class SampleCatalogue(object):
     def is_fourtop(sample):
         return any(str(dsid) in sample.full_name for dsid in [410080])
 
-    # perhaps the is_* below should go to VlqSampleCatalogue? DG-2016-07-14
-    @staticmethod
-    def is_vlq(sample):
-        return any(str(dsid) in sample.full_name for dsid in range(302468, 302519+1))
-
-    @staticmethod
-    def is_uerdpp(sample):
-        return any(str(dsid) in sample.full_name for dsid in range(302055, 302059+1))
-
-    @staticmethod
-    def is_fourtopci(sample):
-        return any(str(dsid) in sample.full_name for dsid in [302777])
-
-    @staticmethod
-    def is_hbsm(sample):
-        return any(str(dsid) in sample.full_name
-                   for dsid in range(304777, 304780+1) + range(341541, 341555+1) + [343434, 343432])
-
-
-
-
     @staticmethod
     def read_lines_from_txt(txt_filename):
         "parse a file dropping comment and empty lines"
@@ -266,7 +276,132 @@ class SampleCatalogue(object):
             of.write('cd - \n')
         print "To generate the file lists, open a new shell with rucio, then execute 'source %s'" % script_filename
 
-# class VlqSampleCatalogue(SampleCatalogue):
+#___________________________________________________________
+
+class VlqSampleCatalogue(SampleCatalogue):
+    "Catalogue with features that are specific to the VLQ analysis"
+    @classmethod
+    def determine_group_from_name(cls, sample=None):
+        """Determine the group of this sample from its.
+
+        This is where the analysis-specific catalogues can implement their categorisation.
+        """
+        group = ('vlq' if cls.is_vlq(sample) else
+                 'uerdpp' if cls.is_uerdpp(sample) else
+                 'fourtopci' if cls.is_fourtopci(sample) else
+                 SampleCatalogue.determine_group_from_name(sample))
+
+    @classmethod
+    def categorise_samples(cls, samples, overwrite_group=False, verbose=False):
+        """This is where the samples are organised in groups.
+
+        For some groups we might need to perform extra steps; for
+        example we need to assign the 'short_name' (this was called
+        'name' in VLQ_Samples.py)
+        """
+        for sample in samples:
+            sample.group = cls.determine_group_from_name(sample)
+            if sample.group == 'vlq':
+                sample.short_name = cls.vlq_short_name(sample)
+            elif sample.group == 'uerdpp':
+                sample.short_name = cls.uerdpp_short_name(sample)
+            elif sample.group == 'fourtopci':
+                sample.short_name = '4tops_CI'
+
+    @staticmethod
+    def is_vlq(sample):
+        return any(str(dsid) in sample.full_name for dsid in range(302468, 302519+1))
+
+    @staticmethod
+    def is_uerdpp(sample):
+        return any(str(dsid) in sample.full_name for dsid in range(302055, 302059+1))
+
+    @staticmethod
+    def is_fourtopci(sample):
+        return any(str(dsid) in sample.full_name for dsid in [302777])
+
+    @classmethod
+    def vlq_short_name(cls, sample=None):
+        dsid = int(sample.dsid)
+        return ('VLQ_TT_600'  if dsid==302469 else
+                'VLQ_TT_700'  if dsid==302470 else
+                'VLQ_TT_750'  if dsid==302471 else
+                'VLQ_TT_800'  if dsid==302472 else
+                'VLQ_TT_850'  if dsid==302473 else
+                'VLQ_TT_900'  if dsid==302474 else
+                'VLQ_TT_950'  if dsid==302475 else
+                'VLQ_TT_1000' if dsid==302476 else
+                'VLQ_TT_1050' if dsid==302477 else
+                'VLQ_TT_1100' if dsid==302478 else
+                'VLQ_TT_1150' if dsid==302479 else
+                'VLQ_TT_1200' if dsid==302480 else
+                'VLQ_TT_1300' if dsid==302481 else
+                'VLQ_TT_1400' if dsid==302482 else
+                None)
+
+    @classmethod
+    def uerdpp_short_name(cls, sample=None):
+        dsid = int(sample.dsid)
+        return ('UEDRPP_1000' if dsid==302055 else
+                'UEDRPP_1200' if dsid==302056 else
+                'UEDRPP_1400' if dsid==302057 else
+                'UEDRPP_1600' if dsid==302058 else
+                'UEDRPP_1800' if dsid==302059 else
+                None)
+
+#___________________________________________________________
+
+class HbsmSampleCatalogue(SampleCatalogue):
+    "Catalogue with features that are specific to the HBSM analysis"
+    @classmethod
+    def determine_group_from_name(cls, sample=None):
+        """Determine the group of this sample from its.
+
+        This is where the analysis-specific catalogues can implement their categorisation.
+        """
+        group = ('vlq' if cls.is_vlq(sample) else
+                 'uerdpp' if cls.is_uerdpp(sample) else
+                 'fourtopci' if cls.is_fourtopci(sample) else
+                 SampleCatalogue.determine_group_from_name(sample))
+
+    @classmethod
+    def categorise_samples(cls, samples, overwrite_group=False, verbose=False):
+        """This is where the samples are organised in groups.
+
+        For some groups we might need to perform extra steps; for
+        example we need to assign the 'short_name' (this was called
+        'name' in VLQ_Samples.py)
+        """
+        for sample in samples:
+            sample.group = cls.determine_group_from_name(sample)
+            if sample.group == 'hbsm':
+                sample.short_name = cls.hbsm_short_name(sample)
+
+    @staticmethod
+    def is_hbsm(sample):
+        return any(str(dsid) in sample.full_name
+                   for dsid in range(304777, 304780+1) + range(341541, 341555+1) + [343434, 343432])
+    @classmethod
+    def hbsm_short_name(cls, sample=None):
+        dsid = int(sample.dsid)
+        return ('VLQ_TT_600'  if dsid==302469 else
+                'VLQ_TT_700'  if dsid==302470 else
+                'VLQ_TT_750'  if dsid==302471 else
+                'VLQ_TT_800'  if dsid==302472 else
+                'VLQ_TT_850'  if dsid==302473 else
+                'VLQ_TT_900'  if dsid==302474 else
+                'VLQ_TT_950'  if dsid==302475 else
+                'VLQ_TT_1000' if dsid==302476 else
+                'VLQ_TT_1050' if dsid==302477 else
+                'VLQ_TT_1100' if dsid==302478 else
+                'VLQ_TT_1150' if dsid==302479 else
+                'VLQ_TT_1200' if dsid==302480 else
+                'VLQ_TT_1300' if dsid==302481 else
+                'VLQ_TT_1400' if dsid==302482 else
+                None)
+
+
+#___________________________________________________________
 
 #     @classmethod
 #     def categorise(cls, sample):
@@ -275,6 +410,7 @@ class SampleCatalogue(object):
 #         for name in os.listdir(data_dir):
 #         yield cls(os.path.join(data_dir, name)) # up to concrete subclass to interpret inputs
 
+#___________________________________________________________
 
 if __name__=='__main__':
     print "Testing sample catalogues"
@@ -284,7 +420,7 @@ if __name__=='__main__':
     sc.verbose = True
     sc.add_samples_from_file(path='VLQAnalysis/data/samples_HtX4TopsNtuple-00-00-11.txt')
     print "collected %d samples" % len(sc.samples)
-    sc.categorise_all_samples(sc.samples)
+    # sc.categorise_samples(sc.samples) # only for specialised catalogues
 
     # -- tested: ok
     # print 'samples:'
@@ -302,8 +438,15 @@ if __name__=='__main__':
     #     print '\n'.join(s.full_name for s in uncategorised_samples)
 
     # -- tested: ok (go from one list to group files and back)
+    # groupfiles_directory = 'VLQAnalysis/data/hbsm/groups'
+    # sc.write_group_files(output_directory=groupfiles_directory)
+    # sc2 = SampleCatalogue()
+    # sc2.add_samples_from_group_files(glob.glob(groupfiles_directory+'/*.txt'))
+    # print "%d samples from production file, and %d samples from group files" % (len(sc.samples), len(sc2.samples))
+
+    # -- ongoing:
     groupfiles_directory = 'VLQAnalysis/data/hbsm/groups'
-    sc.write_group_files(output_directory=groupfiles_directory)
-    sc2 = SampleCatalogue()    
-    sc2.add_samples_from_group_files(glob.glob(groupfiles_directory+'/*.txt'))
-    print "%d samples from production file, and %d samples from group files" % (len(sc.samples), len(sc2.samples))
+    sc_hbsm = HbsmSampleCatalogue()
+    sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/hbsm/groups/*.txt'))
+
+    print "%d samples from group files" % (len(sc_hbsm.samples))
-- 
GitLab


From e4496c27046f04fc598ce957e57ab2747c202132 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Fri, 15 Jul 2016 17:56:37 +0200
Subject: [PATCH 02/35] move up

---
 python/{samples => }/systematics.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename python/{samples => }/systematics.py (100%)

diff --git a/python/samples/systematics.py b/python/systematics.py
similarity index 100%
rename from python/samples/systematics.py
rename to python/systematics.py
-- 
GitLab


From dbf25b82d6681c647cafa0c70391b2b4a67f1df4 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Fri, 15 Jul 2016 18:56:48 +0200
Subject: [PATCH 03/35] version with systematic uncertainties

can build collection of samples with uncertainties.
Might need to move some code to the base catalogue from the hbsm one.
---
 python/sample_catalogues.py | 137 +++++++++++++++++++++++++++++-------
 1 file changed, 110 insertions(+), 27 deletions(-)

diff --git a/python/sample_catalogues.py b/python/sample_catalogues.py
index cc81df5..87ccb18 100644
--- a/python/sample_catalogues.py
+++ b/python/sample_catalogues.py
@@ -8,7 +8,7 @@ SampleCatalogue is the main class here. It takes care of:
 
 - organizing the samples in categories: data, ttbar, signal,
   etc. Categories are flexible (just list of files) and they should be
-  implemented following the `categorise_all_samples` example.
+  implemented following the `categorise_samples` example.
 - TODO attaching systematic uncertainties to the samples
 - TODO attaching ChainProvider(s) to the Variation
 - TODO building the VLQAnalysis/data/samples_info.dat file
@@ -30,14 +30,15 @@ davide.gerbaudo@gmail.com
 Jul 2016
 """
 
+import copy
 import glob
 import os
 import collections
 
 from VLQAnalysis import utils
 
-# import systematics # TODO
-# catalogue = systematics.SystematicCatalogue()
+import systematics # perhaps the catalogue should be instantiated elsewhere (e.g. if it will become a specialised catalogue?)
+catalogue = systematics.SystematicCatalogue()
 
 
 #___________________________________________________________
@@ -79,6 +80,20 @@ class Sample(object):
         else:
             pass
 
+    # @property
+    # def group(self):
+    #     return self._group
+
+    # @group.setter
+    # def group(self, value, overwrite_group=False):
+    #     "this attribute is set by SampleCatalogue so we need to check it and cache it"
+    #     if not sample._group or sample._group==value:
+    #         sample._group = group
+    #     elif overwrite_group:
+    #         sample._group = group
+    #     else:
+    #         pass
+
     def use_all_uncertainties(self):
         self.systematic_uncertainties = catalogue.all_uncertainties()
 
@@ -113,7 +128,7 @@ class SampleCatalogue(object):
     Samples can be added to the catalogue in two ways:
 
     - either from a single file, in which case they will not have a
-      'group' until `categorise_all()` is called. This is typically
+      'group' until `categorise_samples()` is called. This is typically
       used when we move to a new production and all the files are just
       listed in a file.
 
@@ -139,17 +154,19 @@ class SampleCatalogue(object):
         """
         for path in paths:
             group = utils.filename_without_extension(path)
-            self.add_samples_from_file(path, line_parser=lambda l: Sample(full_name=l.strip(), group=group))
+            self.add_samples_from_file(path, group=group)
 
-    def add_samples_from_file(self, path, line_parser = lambda l: Sample(full_name=l.strip())):
+    def add_samples_from_file(self, path, group=None):
         """This should be used to populate the catalogue when you have a
         new production. path should be the file 'datasets_to_download.txt'
-        generated from HtX4TopsNtuple
+        generated from HtX4TopsNtuple.
         """
-        self.samples += [line_parser(l) for l in SampleCatalogue.read_lines_from_txt(path)]
+        self.samples += [Sample(full_name=l, group=group) for l in SampleCatalogue.read_lines_from_txt(path)]
 
     def write_group_files(self, output_directory=None, allow_uncategorised_samples=True):
-        "After having called 'categorise_all_samples', you can write the categorization to group files."
+        """After having called 'categorise_samples', you can write the samples organised in group files.
+        Alternatively, you can also just write your group files by hand.
+        """
         if self.has_uncategorised_samples:
             print "There are samples that do not belong to any group. The group files will not be complete."
             if not allow_uncategorised_samples:
@@ -169,11 +186,26 @@ class SampleCatalogue(object):
                 output_file.write('\n'.join(s.full_name for s in samples))
             if self.verbose:
                 print "written %s" % filename
+    @classmethod
+    def categorise_samples(cls, samples):
+        raise NotImplementedError("This operation depends on the analysis,"
+                                  " and it is implemented only in the specialised catalogues")
+    @classmethod
+    def add_systematic_variations(cls, samples):
+        raise NotImplementedError("This operation depends on the analysis,"
+                                  " and it is implemented only in the specialised catalogues")
 
     @property
     def has_uncategorised_samples(self):
         return any(not s.group for s in self.samples)
 
+    @property
+    def groups(self):
+        return sorted(list(set(s.group for s in self.samples if s.group)))
+
+    def samples_from_group(self, group=''):
+        return [s for s in self.samples if s.group==group]
+
     @classmethod
     def categorise_all_samples(cls, samples, overwrite_group=False, verbose=False):
         raise NotImplementedError("This method should be implemented in the analysis-specific sub-classes")
@@ -320,7 +352,7 @@ class VlqSampleCatalogue(SampleCatalogue):
     def is_fourtopci(sample):
         return any(str(dsid) in sample.full_name for dsid in [302777])
 
-    @classmethod
+    @staticmethod
     def vlq_short_name(cls, sample=None):
         dsid = int(sample.dsid)
         return ('VLQ_TT_600'  if dsid==302469 else
@@ -339,7 +371,7 @@ class VlqSampleCatalogue(SampleCatalogue):
                 'VLQ_TT_1400' if dsid==302482 else
                 None)
 
-    @classmethod
+    @staticmethod
     def uerdpp_short_name(cls, sample=None):
         dsid = int(sample.dsid)
         return ('UEDRPP_1000' if dsid==302055 else
@@ -400,26 +432,73 @@ class HbsmSampleCatalogue(SampleCatalogue):
                 'VLQ_TT_1400' if dsid==302482 else
                 None)
 
+    @classmethod
+    def add_systematic_variations(cls, samples=None, verbose=False):
+        """Here we might need to add/drop samples, so we will just
+        re-build the list dealing with the groups one at the time
+        """
+        updated_samples = []
+        samples_per_group = collections.defaultdict(list)
+        for sample in samples:
+            samples_per_group[sample.group].append(sample)
+        for group, samples in samples_per_group.iteritems():
+            if group=='data':
+                updated_samples += samples
+            elif group=='ttbar':
+                if verbose:
+                    print 'adding ttbar systematics'
+                updated_samples += cls.add_ttbar_systematics(samples)
+            else:
+                if verbose:
+                    print 'adding other systematics'
+                updated_samples += cls.add_generic_systematics(samples)
+            # do we need to do anything special for the signals?
+        return updated_samples
 
-#___________________________________________________________
-
-#     @classmethod
-#     def categorise(cls, sample):
-
-#         data_dir = config['data_dir']
-#         for name in os.listdir(data_dir):
-#         yield cls(os.path.join(data_dir, name)) # up to concrete subclass to interpret inputs
+    @staticmethod
+    def add_ttbar_systematics(ttbar_samples):
+        """Take a list of samples and provide a new list containing
+        samples with syst uncertainties (and additional samples if
+        needed when splitting in hf).
+        """
+        updated_samples = []
+        hf_splitted = True # should it be configurable?  in this case we need to process n times the samples
+        use_ht_slices = True # should it be configurable? in this case we need to process more samples
+        ht_sliced_dsids = [410000, 407009, 407010, 407011, 407012] # low, 1, 2, 3, met
+        ht_sliced_samples = [s for s in ttbar_samples if int(s.dsid) in ht_sliced_dsids]
+        ht_inclusive_samples = [s for s in ht_sliced_samples if int(s.dsid)==41000]
+
+        if hf_splitted: # this only implements the 'simple' splitting of VLQ_Samples.py
+            samples_to_split_in_hf = ht_sliced_samples if use_ht_slices else ht_inclusive_samples
+            hf_slices = ['light', 'bb', 'cc']
+            for hf_slice in hf_slices: # need to make a copy of the samples b/c they will be processed 3 times
+                samples_this_slice = [copy.deepcopy(s) for s in samples_to_split_in_hf]
+                for s in samples_this_slice:
+                    s.short_name = 'ttbar'+hf_slice
+                updated_samples += samples_this_slice
+        elif use_ht_slices:
+            updated_samples += ht_sliced_samples
+        else:
+            raise NotImplementedError("add_ttbar_systematics not implemented w/out slices, see VLQ_Samples.py")
+        for s in updated_samples: # TODO check with Loic+Mirko that this is always needed
+            s.use_all_uncertainties()
+        return updated_samples
 
-#___________________________________________________________
+    @staticmethod
+    def add_generic_systematics(samples):
+        "Toggle on the weight and object systematic variations"
+        for s in samples:
+            s.use_all_uncertainties()
+        return samples
 
 if __name__=='__main__':
     print "Testing sample catalogues"
 
-    print "build catalogue from Peyton's file:"
-    sc = SampleCatalogue()
-    sc.verbose = True
-    sc.add_samples_from_file(path='VLQAnalysis/data/samples_HtX4TopsNtuple-00-00-11.txt')
-    print "collected %d samples" % len(sc.samples)
+    # print "build catalogue from Peyton's file:"
+    # sc = SampleCatalogue()
+    # sc.verbose = True
+    # sc.add_samples_from_file(path='VLQAnalysis/data/samples_HtX4TopsNtuple-00-00-11.txt')
+    # print "collected %d samples" % len(sc.samples)
     # sc.categorise_samples(sc.samples) # only for specialised catalogues
 
     # -- tested: ok
@@ -444,9 +523,13 @@ if __name__=='__main__':
     # sc2.add_samples_from_group_files(glob.glob(groupfiles_directory+'/*.txt'))
     # print "%d samples from production file, and %d samples from group files" % (len(sc.samples), len(sc2.samples))
 
-    # -- ongoing:
+    # -- tested: ok (also the ttbar ht + hf splitting)
     groupfiles_directory = 'VLQAnalysis/data/hbsm/groups'
     sc_hbsm = HbsmSampleCatalogue()
     sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/hbsm/groups/*.txt'))
-
     print "%d samples from group files" % (len(sc_hbsm.samples))
+    sc_hbsm.samples = sc_hbsm.add_systematic_variations(sc_hbsm.samples)
+    print "%d samples after syst variations" % (len(sc_hbsm.samples))
+    ttbar_samples = sc_hbsm.samples_from_group('ttbar')
+    for s in ttbar_samples:
+        print s.short_name,' ',s.full_name
-- 
GitLab


From 5cfad7a661649a95f0d196cd5476269ea36ac71d Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Wed, 20 Jul 2016 14:03:12 +0200
Subject: [PATCH 04/35] add Variation.filelist

With some protection on setter/getter so that we know how to handle
missing filelists from the catalogue.
---
 python/systematics.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/python/systematics.py b/python/systematics.py
index fd18a96..0e806ca 100644
--- a/python/systematics.py
+++ b/python/systematics.py
@@ -15,12 +15,22 @@ class Variation(object):
         self.input_tree = input_tree
         self.is_weight_variation = False
         self.is_object_variation = False
+        self._filelist = None
     @property
     def name(self):
         if self.input_tree.endswith(Variation.treename_suffix):
             return self.input_tree[:-len(Variation.treename_suffix)]
         else:
             raise ValueError("Variation: cannot interpred treename '%s' as a variation name" % self.input_tree)
+    @property
+    def filelist(self):
+        if not self._filelist:
+            raise IOError("missing input data for '%s'\nPlease call SampleCatalogue.add_input_data")
+        else:
+            return self._filelist
+    @filelist.setter
+    def filelist(self, value):
+        self._filelist = value
 
 class WeightVariation(Variation):
     "A variation that only changes the event weight, but not the event selection"
-- 
GitLab


From f05e9b843b397c26b96bfccff791da5aca0091e8 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Wed, 20 Jul 2016 16:43:35 +0200
Subject: [PATCH 05/35] working version of the various InputDataInterface

Along with the site/support specific classes:
- LocalDiskInterface
- EosUserInterface
- RseInterface
- RucioEosCernInterface
- RucioPnfsIfaeInterface
- At3ScratchDiskInterface
---
 python/sample_catalogues.py | 275 ++++++++++++++++++++++++++++++------
 1 file changed, 235 insertions(+), 40 deletions(-)

diff --git a/python/sample_catalogues.py b/python/sample_catalogues.py
index 87ccb18..e3f39c0 100644
--- a/python/sample_catalogues.py
+++ b/python/sample_catalogues.py
@@ -9,15 +9,16 @@ SampleCatalogue is the main class here. It takes care of:
 - organizing the samples in categories: data, ttbar, signal,
   etc. Categories are flexible (just list of files) and they should be
   implemented following the `categorise_samples` example.
-- TODO attaching systematic uncertainties to the samples
-- TODO attaching ChainProvider(s) to the Variation
+- attaching systematic uncertainties to the samples
+- attaching ChainProvider(s) to the Variation
 - TODO building the VLQAnalysis/data/samples_info.dat file
 
 Overall design:
 A Sample has a list of SystematicUncertainty; the 'nominal' case is
 just a SystematicUncertainty.is_nominal.
 Each uncertainty has one or two Variation.
-Each Variation has a ChainProvider
+Each Variation has access to the input chain through a filelist
+generated by InputDataInterface
 
 Different analyses might need to access different samples, or to
 organize them in different ways (e.g. special treatment of signal
@@ -25,6 +26,8 @@ samples, systematic samples, etc.). This is implemented by
 specializing SampleCatalogue.
 
 TODO : implement special treatment of systematic samples.
+TODO : speedup the generation of filelists? (rucio can take hrs w/out
+       multiprocessing, local disk is fine)
 
 davide.gerbaudo@gmail.com
 Jul 2016
@@ -33,6 +36,7 @@ Jul 2016
 import copy
 import glob
 import os
+import re
 import collections
 
 from VLQAnalysis import utils
@@ -80,20 +84,6 @@ class Sample(object):
         else:
             pass
 
-    # @property
-    # def group(self):
-    #     return self._group
-
-    # @group.setter
-    # def group(self, value, overwrite_group=False):
-    #     "this attribute is set by SampleCatalogue so we need to check it and cache it"
-    #     if not sample._group or sample._group==value:
-    #         sample._group = group
-    #     elif overwrite_group:
-    #         sample._group = group
-    #     else:
-    #         pass
-
     def use_all_uncertainties(self):
         self.systematic_uncertainties = catalogue.all_uncertainties()
 
@@ -108,20 +98,6 @@ class Sample(object):
         pass
 #___________________________________________________________
 
-class ChainProvider(object):
-    "Provides access to a chain built out of files on disk/eos/pnfs"
-    def __init__(self, filelist_dir=None, filelist_file=None):
-        if filelist_file and not os.path.exists(self.filelist_file):
-            print "missing filelist %s" %self.filelist_file
-            self.commands_to_build_list()
-    def commands_to_build_list(self, rse='CERN-PROD_SCRATCHDISK'):
-        cmd = "rucio list-file-replicas --rse %s %s | grep %s | awk '{print $12}'" % (rse, self.full_name, rse)
-        print "# open a shell with rucio, then"
-        print cmd
-        print "# now replace the stuff before 'eos' with 'root://eosatlas/'"
-
-#___________________________________________________________
-
 class SampleCatalogue(object):
     """Keep track of a collection of samples.
 
@@ -194,6 +170,20 @@ class SampleCatalogue(object):
     def add_systematic_variations(cls, samples):
         raise NotImplementedError("This operation depends on the analysis,"
                                   " and it is implemented only in the specialised catalogues")
+    def add_filelists(self, samples=None, input_interface=None):
+        "Attach a filelist to each one of the input samples x variations"
+        samples = self.samples if not samples else samples
+        print "About the create filelists for %d samples; this might take some time." % len(samples)
+        for sample in samples:
+            for uncertainty in sample.systematic_uncertainties:
+                for variation in uncertainty.variations:
+                    variation.filelist = input_interface.generate_filelist(sample.full_name)
+                    # note to self: here sample knows about the
+                    # container name, and variation knows about the
+                    # treename. It assumes that the object variation
+                    # trees are in the same file as the nominal one.
+                    # I might need to revise this when we start using
+                    # systematic samples?
 
     @property
     def has_uncategorised_samples(self):
@@ -282,11 +272,12 @@ class SampleCatalogue(object):
 
     def write_script_to_generate_rucio_eos_lists(self, script_filename='generate_eos_filelists.sh',
                                                  output_directory='./', rse='CERN-PROD_SCRATCHDISK'):
-        """TODO this should probably go in EosInputChain.
+        """Obsolete, please use SampleCatalogue.add_filelists
 
         I cannot execute the rucio commands in the same shell where I
         run python, so I have to write them out to a file.
         """
+        raise NotImplementedError("Obsolete, please use SampleCatalogue.add_filelists")
         tmp_samplelist = '/tmp/samples.txt'
         with open(script_filename, 'w') as of:
             of.write('#!/bin/bash\n')
@@ -491,6 +482,190 @@ class HbsmSampleCatalogue(SampleCatalogue):
             s.use_all_uncertainties()
         return samples
 
+#___________________________________________________________
+
+class InputDataInterface(object):
+    """Base class defining how we access input data (through filelists).
+
+    In general one should just call 'filelist()'; some interfaces
+    (e.g. disk) will automatically generate it if it's not there;
+    others (e.g. eos, rse) will tell you how to generate it. The
+    second behaviour is motivated by the fact that the generation
+    might take time (e.g. eos) or special setup (e.g. rucio), so it
+    might be better to do this asynchronously, once for all samples in
+    a separate shell.
+
+    For an example of the second case, see
+    'SampleCatalogue.add_filelists'.
+
+    For tests, one can also call
+    'InputDataInterface.generate_filelist' for a single sample (rather
+    than trough the catalogue).
+    """
+    def __init__(self, filelist_dir):
+        self.filelist_dir = utils.mkdir_if_needed(filelist_dir)
+    def generate_filelist(self, container):
+        raise NotImplementedError("Should be implemented in specialised classes")
+    def filelist(self, container):
+        raise NotImplementedError("Should be implemented in specialised classes")
+#___________________________________________________________
+
+class LocalDiskInterface(InputDataInterface):
+    """Data on disk that can be accessed through simple os.path.
+    If there is no filelist just generate it.
+    It assumes that there is one sub-directory for each container.
+    """
+    def __init__(self, filelist_dir, base_input_dir):
+        super(LocalDiskInterface, self).__init__(filelist_dir)
+        self.base_input_dir = base_input_dir
+
+    def generate_filelist(self, container):
+        container = container.strip('/')
+        filelist_path = os.path.join(self.filelist_dir, container+'.txt')
+        if not os.path.exists(filelist_path):
+            with open(filelist_path, 'w') as filelist_file:
+                filenames = [f for f in os.listdir(os.path.join(self.base_input_dir, container))
+                             if '.root' in f]
+                filenames = sorted(filenames)
+                filenames = [os.path.abspath(os.path.join(self.base_input_dir, container, f))
+                             for f in filenames]
+                filelist_file.write('\n'.join(filenames)+'\n')
+        return filelist_path
+
+    def filelist(self, container):
+        filelist_path = os.path.join(self.filelist_dir, container.strip('/')+'.txt')
+        if not os.path.exists(filelist_path):
+            self.generate_filelist(container)
+        return filelist_path
+#___________________________________________________________
+
+class EosUserInterface(InputDataInterface):
+    """Data on eos accessed through 'eos ls' (not via rucio).
+    When the filelist if missing, raise IOError.
+    Prefer not to generate them under the hood (can take time, better
+    if user does it explicitly).
+    """
+    def __init__(self, filelist_dir, base_input_dir):
+        super(EosUserInterface, self).__init__(filelist_dir)
+        self.base_input_dir = base_input_dir
+
+    def filelist(self, container):
+        container = container.strip('/')
+        filelist_path = os.path.join(self.filelist_dir, container+'.txt')
+        if not os.path.exists(filelist_path):
+            raise IOError("Missing filelist from EosUserInterface(%s) for %s" % (self.filelist_dir, container)
+                          +"Probably need to call SampleCatalogue.add_filelists()")
+        return filelist_path
+
+    def generate_filelist(self, container):
+        raise NotImplementedError("Need some cleanup of the output? and perhaps pre-pend 'root://eosatlas//eo'")
+        # todo include lines below
+        container = container.strip('/')
+        cmd = "eos ls %s/%s" % (self.base_input_dir, container)
+        utils.get_command_output(cmd)
+        filelist_path = os.path.join(self.filelist_dir, container+'.txt')
+        with open(filelist_path, 'w') as filelist_file:
+            filenames = [f for f in os.listdir(os.path.join(self.base_input_dir, container)) if '.root' in f]
+            filelist_file.write('\n'.join(filenames))
+        return filelist_path
+
+#___________________________________________________________
+
+class RseInterface(InputDataInterface):
+    """Interface to a remote storage element accessed through rucio.
+    When the filelist if missing, raise IOError.
+    Prefer not to generate them under the hood (can take time, better
+    if user does it explicitly).
+
+    Users should usually instantiate objects of site-specific classes
+    (see RucioEosCernInterface and RucioPnfsIfaeInterface)
+    """
+    def __init__(self, filelist_dir, rse, root_prefix, root_prefix_placeholder):
+        """
+        Example arguments:
+        - rse : CERN-PROD_SCRATCHDISK
+        - root_prefix : root://eosatlas//eos
+        - root_prefix_placeholder : eos
+        See generate_filelist.clean_line for details
+        """
+        super(RseInterface, self).__init__(filelist_dir)
+        self.rse = rse
+        self.root_prefix = root_prefix
+        self.root_prefix_placeholder = root_prefix_placeholder
+
+    def filelist(self, container):
+        filelist_path = os.path.join(self.filelist_dir, container.strip('/')+'.txt')
+        if not os.path.exists(filelist_path):
+            raise IOError("Missing filelist from RseUserInterface(%s) for %s" % (self.filelist_dir, container)
+                          +"Probably need to call SampleCatalogue.add_filelists()")
+        return filelist_path
+
+    def generate_filelist(self, container, overwrite_filelist=False):
+        container = container.strip('/')
+        has_rucio = any('RUCIO' in k for k in os.environ.keys())
+        has_voms = any('VOMS' in k for k in os.environ.keys()) # does not take into account expired token
+        if not has_rucio or not has_voms:
+            raise EnvironmentError("Invalid environment: please 'lsetup rucio' and 'voms-proxy-init -voms atlas'")
+        filelist_path = os.path.join(self.filelist_dir, container+'.txt')
+        if os.path.exists(filelist_path):
+            if not overwrite_filelist:
+                return filelist_path
+        cmd = "rucio list-file-replicas --rse {rse:s} {container:s} | grep {rse:s}".format(**{'rse':self.rse,
+                                                                                              'container':container })
+        def clean_line(line, rse, prefix_from, prefix_to):
+            """
+            Convert output line that looks like:
+            | user.prose | user.prose.8949257._000001.out.root | 3.4 GB     | 22acf9ae  | CERN-PROD_SCRATCHDISK: gsiftp://eosatlassftp.cern.ch:2811/eos/atlas/atlasscratchdisk/rucio/user/prose/6d/a7/user.prose.8949257._000001.out.root |
+            into a line that looks like:
+            root://eosatlas//eos/atlas/atlasscratchdisk/rucio/user/prose/6d/a7/user.prose.8949257._000001.out.root
+            """
+            fields = [f.strip() for f in line.split('|')]
+            file_column = next((f for f in fields if rse in f), None)
+            file_path = next((f.strip() for f in file_column.split() if prefix_from in f), None)
+            return re.sub(r'.*'+prefix_from, prefix_to, file_path, 1)
+        out = utils.get_command_output(cmd)
+        if out['returncode']:
+            raise IOError("Command failed: '%s'\nstdout:\n%s\nstderr:\n%s" %
+                          (cmd, out['stdout'], out['stderr']))
+        else:
+            lines = [l for l in out['stdout'].split('\n') if self.rse in l]
+            filenames = [clean_line(l, self.rse, self.root_prefix_placeholder, self.root_prefix)
+                         for l in lines]
+            with open(filelist_path, 'w') as filelist_file:
+                filelist_file.write('\n'.join(filenames))
+        return filelist_path
+#___________________________________________________________
+
+class RucioEosCernInterface(RseInterface):
+    "Access files on CERN-PROD_SCRATCHDISK through eos"
+    def __init__(self, filelist_dir='VLQAnalysis/data/filelist/eos',
+                 rse='CERN-PROD_SCRATCHDISK',
+                 root_prefix='root://eosatlas//eos/atlas',
+                 root_prefix_placeholder='/eos/atlas'):
+        super(RucioEosCernInterface, self).__init__(filelist_dir, rse, root_prefix, root_prefix_placeholder)
+
+#___________________________________________________________
+
+class RucioPnfsIfaeInterface(RseInterface):
+    "Access files on IFAE_SCRATCHDISK through pnfs"
+    def __init__(self, filelist_dir='VLQAnalysis/data/filelist/pnfs',
+                 rse='IFAE_SCRATCHDISK',
+                 root_prefix='root://xrootd.pic.es//pnfs/pic.es',
+                 root_prefix_placeholder='/pnfs/pic.es'):
+        super(RucioPnfsIfaeInterface, self).__init__(filelist_dir, rse, root_prefix, root_prefix_placeholder)
+
+class At3ScratchDiskInterface(LocalDiskInterface):
+    """Data downloaded to the scratch2 disk on at3.
+    Currently 00-10 production.
+    """
+    def __init__(self,
+                 filelist_dir='VLQAnalysis/data/hbsm/filelist/at3pnfs',
+                 base_input_dir='/nfs/at3/scratch2/lvalery/VLQFiles/AT-00-00-10/'):
+        super(At3ScratchDiskInterface, self).__init__(filelist_dir, base_input_dir)
+
+
+#___________________________________________________________
+
 if __name__=='__main__':
     print "Testing sample catalogues"
 
@@ -517,19 +692,39 @@ if __name__=='__main__':
     #     print '\n'.join(s.full_name for s in uncategorised_samples)
 
     # -- tested: ok (go from one list to group files and back)
-    # groupfiles_directory = 'VLQAnalysis/data/hbsm/groups'
+    # groupfiles_directory = 'VLQAnalysis/data/groups/hbsm'
     # sc.write_group_files(output_directory=groupfiles_directory)
     # sc2 = SampleCatalogue()
     # sc2.add_samples_from_group_files(glob.glob(groupfiles_directory+'/*.txt'))
     # print "%d samples from production file, and %d samples from group files" % (len(sc.samples), len(sc2.samples))
 
     # -- tested: ok (also the ttbar ht + hf splitting)
-    groupfiles_directory = 'VLQAnalysis/data/hbsm/groups'
+    # groupfiles_directory = 'VLQAnalysis/data/groups/hbsm'
+    # sc_hbsm = HbsmSampleCatalogue()
+    # sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/groups/hbsm/*.txt'))
+    # print "%d samples from group files" % (len(sc_hbsm.samples))
+    # sc_hbsm.samples = sc_hbsm.add_systematic_variations(sc_hbsm.samples)
+    # print "%d samples after syst variations" % (len(sc_hbsm.samples))
+    # ttbar_samples = sc_hbsm.samples_from_group('ttbar')
+    # for s in ttbar_samples:
+    #     print s.short_name,' ',s.full_name
+
+    # -- tested: ok for both eos and disk
     sc_hbsm = HbsmSampleCatalogue()
-    sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/hbsm/groups/*.txt'))
-    print "%d samples from group files" % (len(sc_hbsm.samples))
+    sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/groups/hbsm/*.txt'))
+    # sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/hbsm_test/hbsm.txt')) # test just on
     sc_hbsm.samples = sc_hbsm.add_systematic_variations(sc_hbsm.samples)
-    print "%d samples after syst variations" % (len(sc_hbsm.samples))
-    ttbar_samples = sc_hbsm.samples_from_group('ttbar')
-    for s in ttbar_samples:
-        print s.short_name,' ',s.full_name
+    input_from_dir = LocalDiskInterface(filelist_dir='VLQAnalysis/data/filelist',
+                                        base_input_dir='/tmp/gerbaudo/rucio')
+    input_from_eos = RucioEosCernInterface()
+    def print_filelists(samples):
+        for sample in samples:
+            for systematic in sample.systematic_uncertainties:
+                for variation in [v for v in systematic.variations if v.name=='nominal']:
+                    print "%s %s : %s" % (variation.name, sample.full_name, variation.filelist)
+    try:
+        print_filelists(sc_hbsm.samples)
+    except IOError:
+        print "Missing filelists, generating them"
+        sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_eos)
+    print_filelists(sc_hbsm.samples)
-- 
GitLab


From 08b100cbe872d437eb8671fa3deb79e0a7163d90 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Wed, 20 Jul 2016 16:47:02 +0200
Subject: [PATCH 06/35] move data/hbsm/groups/ -> data/groups/hbsm/

Idea of overall organization:
data/<what> (generic files, like filelists)
data/<what>/{hbsm,vlq} (analysis-specific files, like groups)

We need to decide whether the files should be in git or not (for
example the groups files can easily be re-generated since their logic
is implemented in the SampleCatalogue).
Also, it might be excessive to store all of the filelists:
> 1.8M    data/filelist/eos/
---
 data/groups/hbsm/.gitignore | 1 +
 data/hbsm/groups/.gitignore | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 data/groups/hbsm/.gitignore
 delete mode 100644 data/hbsm/groups/.gitignore

diff --git a/data/groups/hbsm/.gitignore b/data/groups/hbsm/.gitignore
new file mode 100644
index 0000000..72e8ffc
--- /dev/null
+++ b/data/groups/hbsm/.gitignore
@@ -0,0 +1 @@
+*
diff --git a/data/hbsm/groups/.gitignore b/data/hbsm/groups/.gitignore
deleted file mode 100644
index 2211df6..0000000
--- a/data/hbsm/groups/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*.txt
-- 
GitLab


From 86aa3700dcea7b7aaaca41c23c9e467c8612acbd Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Thu, 21 Jul 2016 18:06:16 +0200
Subject: [PATCH 07/35] working version of batch utils; now testing

---
 python/batch_utils.py       | 350 ++++++++++++++++++++++++++++++++++++
 python/sample_catalogues.py |   3 +-
 2 files changed, 352 insertions(+), 1 deletion(-)
 create mode 100644 python/batch_utils.py

diff --git a/python/batch_utils.py b/python/batch_utils.py
new file mode 100644
index 0000000..b33e959
--- /dev/null
+++ b/python/batch_utils.py
@@ -0,0 +1,350 @@
+"""
+This module provides utilities to interact with batch systems (lxplus or at3)
+
+Overall design:
+
+JobManager creates Jobs
+Jobs can be generated/submitted/checked/resubmitted
+
+Each group of jobs has a label; all relevant files go in the directories below:
+
+batch/
+`--- <label>/
+     |--- input/
+     |--- log/
+     |--- output/
+     `--- status/
+
+TODO: to avoid having too many files in one place, think about having
+      subdirectories under output (for example by group + merged)
+
+davide.gerbaudo@gmail.com
+Jul 2016
+"""
+
+# import copy
+import glob
+import os
+# import re
+# import collections
+
+from VLQAnalysis import utils
+from VLQAnalysis.sample_catalogues import HbsmSampleCatalogue, LocalDiskInterface, RucioEosCernInterface
+
+#___________________________________________________________
+
+
+def base_directory():
+    """The base directory is the one above VLQAnalysis and RootCoreBin
+
+    All relatives paths are relative to base_directory or to the
+    working directory on the batch node.
+    """
+    python_dir = os.path.dirname(os.path.abspath(__file__))
+    up_two = (os.pardir, os.pardir)
+    return os.path.normpath(os.path.abspath(os.path.join(python_dir, *up_two)))
+
+def vlq_directory():
+    return base_directory()+'/VLQAnalysis'
+
+def prepare_batch_files(opts=None, template_filename=None, samples=[]):
+    batch_dir = utils.mkdir_if_needed(relative_batch_directory(opts))
+    verbose = opts.verbose
+    absolute_output_base_dir = base_directory()
+    tar_file = opts.tarfile if opts.tarfile else "%s/%s/packages.tgz" % (base_directory(), batch_dir)
+    tar_file = os.path.abspath(tar_file)
+    if not os.path.exists(tar_file):
+        prepare_tar_file(tar_file_name=tar_file, verbose=opts.verbose)
+    # TODO : overwrite option
+    # TODO : for lxbatch might need additional job option parameters
+
+    batch_filenames = []
+    if opts.debug:
+        print "filling template %s" % (template_filename)
+    template_contents = open(template_filename).read()
+    for sample in samples:
+        sample_name = sample.short_name
+        # sample_name = 'hbsm'
+        print('fixme hack around sample_name')
+        if opts.syst=='all':
+            sample.use_all_uncertainties()
+        elif opts.syst=='object':
+            sample.use_object_uncertainties()
+        elif opts.syst=='weight':
+            sample.use_weight_uncertainties()
+        else:
+            sample.use_nominal_uncertainty()
+        systematics = sample.systematic_uncertainties
+        systematics = (utils.filter_with_regexp(systematics, opts.syst_include, func=lambda x: x.name) if opts.syst_include else
+                       systematics)
+        systematics = (utils.exclude_with_regexp(systematics, opts.syst_exclude, func=lambda x: x.name) if opts.syst_exclude else
+                       systematics)
+        # should we also filter with the same regex on variation.input_tree? it might be a useful feature
+        for systematic in systematics:
+            is_nominal = systematic.is_nominal
+            for variation in systematic.variations:
+                job_label = vlq_job_label(sample_name, variation.name)
+                parameters = {'sample_name' : sample_name,
+                              'tar_file' : tar_file,
+                              'relative_output_dir' : relative_output_directory(opts),
+                              'absolute_output_base_dir' : absolute_output_base_dir,
+                              'filelist_name' : sample.filelist_file,
+                              'input_tree' : variation.input_tree,
+                              'output_file' : (sample_name+'.root' if is_nominal else
+                                               "%s_%s.root" % (sample_name, variation.name)),
+                              'dsid' : sample.dsid,
+                              'compute_weight_sys' : ('true' if is_nominal else 'false'),
+                              'job_label' : job_label,
+                              }
+                batch_filename = batch_dir+'/'+sample_name+'_'+variation.name+'.sh'
+                if opts.debug:
+                    print "generating %s" % (batch_filename)
+                batch_file = open(batch_filename, 'w')
+                batch_file.write(template_contents.format(**parameters))
+                batch_file.close()
+                os.chmod(batch_filename, 0755)
+                if verbose:
+                    print "created batch file %s" % batch_filename
+                batch_filenames.append(batch_filename)
+    return batch_filenames
+
+def submit_job(batch_file=None, opts=None, lxbatch=False, at3=False):
+    queue = opts.queue
+    verbose = opts.verbose
+    base_dir = base_directory()
+    short_batch_file = os.path.splitext(os.path.basename(batch_file))[0] # w/out ext, =sample name
+
+    cmd = ''
+    if opts.lxbatch:
+        cmd = (" bsub "
+               +" -L /bin/bash " # reset shell
+               +" -q %s " % queue
+               # not sure this is working
+               # +" -o %s/tthf-trex-utils/batch/log/%s_%s.oe" % (base_dir, opts.label, short_batch_file)
+               +" -J %s " % short_batch_file
+               +" -o %s.oe " % (relative_output_directory(opts=opts)+'/'+short_batch_file)
+               +" %s" % os.path.join(base_dir, batch_file)
+               )
+    else:
+        cmd = (" qsub "
+               +" -j oe " # join stdout and stderr
+               +" -o %s/%s/%s.oe" % (base_dir, relative_output_directory(opts), short_batch_file)
+               +" -q %s " % queue
+               +" %s" % batch_file
+               )
+    if verbose:
+        print cmd
+    if opts.submit:
+        out = utils.get_command_output(cmd)
+        if verbose:
+            print out['stdout']
+            print out['stderr']
+
+def prepare_tar_file(tar_file_name=None, verbose=None):
+    cmd = "tar czf  %s BtaggingTRFandRW IFAEReweightingTools IFAETopFramework VLQAnalysis" % tar_file_name
+    cmd = cmd+" --exclude='.svn' --exclude='*.o' --exclude='*.so'"
+    if not os.path.exists(tar_file_name):
+        raise RuntimeError("Missing tar file; please create it with:\n\n"+cmd)
+
+#___________________________________________________________
+
+class Job(object):
+    "TODO add description"
+    def __init__(self, script_path):
+        self.script_path = script_path
+    @property
+    def short_batch_file(self):
+        return utils.filename_without_extension(self.script_path) # w/out ext, =sample name
+
+
+#___________________________________________________________
+
+class JobManager(object):
+    "Manage a set of jobs; all inputs/outputs will be under batch/<jobset_label>"
+    def __init__(self, jobset_label, verbose=False, debug=False):
+        self.jobset = jobset_label
+        self.queues = []
+        self._queue = None
+        self.absolute_output_base_dir = base_directory()
+        self.dry_run = True
+        self.jobs = []
+        self._tar_file = None
+        self.template_path = None
+        self._template_contents = None # cached
+        self.verbose = verbose
+        self.debug = debug
+        self.create_directories()
+
+
+    def create_job(self, sample, systematic, variation, template_path=None):
+        "This will need access to several specialised attributes (template, dirs, etc.)"
+        raise NotImplementedError("create_job should be implemented in each specialised JobManager class")
+
+    def generic_create_job(self, sample, systematic, variation, template_path=None):
+        """create the script and append Job to self.jobs template_path
+        should be used only for special samples using non-default
+        template
+        """
+        template_path = template_path if template_path else self.template_path
+        template_contents = (self.template_contents if template_path==self.template_path # use cache if default
+                             else open(self.template_path).read()) # otherwise read it
+        sample_name = sample.full_name.strip('/')
+        # sample_name = sample.short_name # maybe this should be configurable? does the plotting/fitting code depend on it?
+        is_nominal = systematic.is_nominal
+        job_label = self.job_label(sample_name=sample_name, variation_name=variation.name)
+        parameters = {'sample_name' : sample_name,
+                      'tar_file' : self.tar_file,
+                      'relative_output_dir' : self.relative_output_directory,
+                      'absolute_output_base_dir' : self.absolute_output_base_dir,
+                      'filelist_name' : variation.filelist,
+                      'input_tree' : variation.input_tree,
+                      'output_file' : (sample_name+'.root' if is_nominal else
+                                       "%s_%s.root" % (sample_name, variation.name)),
+                      'dsid' : sample.dsid,
+                      'compute_weight_sys' : ('true' if is_nominal else 'false'),
+                      'job_label' : job_label,
+                      }
+        batch_filename = self.relative_input_directory+'/'+sample_name+'_'+variation.name+'.sh'
+        if self.debug:
+            print "generating %s" % (batch_filename)
+        batch_file = open(batch_filename, 'w')
+        batch_file.write(template_contents.format(**parameters))
+        batch_file.close()
+        os.chmod(batch_filename, 0755)
+        if self.verbose:
+            print "created batch file %s" % batch_filename
+        self.jobs.append(Job(batch_filename))
+
+    def submit_jobs(self):
+        for job in self.jobs:
+            cmd = self.job_submission_command(queue=self.queue, verbose=self.verbose,
+                                              base_dir=self.absolute_output_base_dir, job=job)
+            if self.verbose:
+                print cmd
+                if not self.dry_run:
+                    out = utils.get_command_output(cmd)
+                    status_path = os.path.join(self.relative_status_directory, job.short_name+'.submitted')
+                    with open(status_path, 'w') as status_file:
+                        status_file.write('stdout:\n'+out['stdout']+
+                                          'stderr:\n'+ out['stderr'])
+    def check_job(self, sample):
+        raise NotImplementedError("todo")
+    @property
+    def relative_input_directory(self):
+        "where the job script files will be generated"
+        return 'batch/'+self.jobset+'/input'
+    @property
+    def relative_log_directory(self):
+        return 'batch/'+self.jobset+'/log'
+    @property
+    def relative_output_directory(self):
+        return 'batch/'+self.jobset+'/output'
+    @property
+    def relative_status_directory(self):
+        return 'batch/'+self.jobset+'/status'
+    def create_directories(self):
+        for d in [self.relative_input_directory, self.relative_log_directory,
+                  self.relative_output_directory, self.relative_status_directory]:
+            dir_path = utils.mkdir_if_needed(d)
+            if self.verbose:
+                print "created %s" % dir_path
+    def job_label(self, sample_name=None, variation_name=None):
+        "The label used to distinguish one job from another."
+        job_label = sample_name+'_'+variation_name
+        return job_label
+    def default_tar_file(self):
+        return base_directory()+'/'+self.relative_input_directory+'/packages.tgz'
+    def check_tar_file(self, path):
+        if not os.path.exists(path):
+            cmd  = "tar czf  %s " % path
+            cmd += " BtaggingTRFandRW IFAEReweightingTools IFAETopFramework VLQAnalysis"
+            cmd += " --exclude='.svn' --exclude='*.o' --exclude='*.so'"
+            raise RuntimeError("Missing tar file; please create it with:\n\n"+cmd)
+    @property
+    def tar_file(self):
+        if not self._tar_file: # set value on first call (and check existence if necessary)
+            self._tar_file = self.default_tar_file()
+            self.check_tar_file(self._tar_file)
+        return self._tar_file
+    @tar_file.setter
+    def tar_file(self, value):
+        value = os.path.abspath(value)
+        self.check_tar_file(value)
+        self._tar_file = value
+    def default_queue(self):
+        return self.queues[0]
+    @property
+    def queue(self):
+        if not self._queue: # set value on first call (and check existence if necessary)
+            self._queue = self.default_queue()
+        return self._queue
+    @queue.setter
+    def queue(self, value):
+        if value not in self.queues:
+            raise ValueError("invalid queue '%s', must be from %s" % (value, str(self.queues)))
+        self._queue = value
+    @property
+    def template_contents(self):
+        if not self._template_contents:
+            self._template_contents = open(self.template_path).read()
+        return self._template_contents
+
+
+
+#___________________________________________________________
+
+class LxbJobManager(JobManager):
+    "Job manager for lxbatch queues at cern"
+    def __init__(self, jobset_label):
+        super(LxbJobManager, self).__init__(jobset_label)
+        self.queues = ['8nm', '1nh', '8nh', '1nd', '2nd', '1nw', '2nw', 'test'] # bqueues -u ${USER}
+        self.template_path = 'VLQAnalysis/data/hbsm/batch/templates/cern/one_lep_nom.sh'
+    def job_submission_command(self, queue=None, verbose=None, base_dir=None, job=None):
+            short_batch_file = job.short_batch_file
+            cmd = (" bsub "
+                   +" -L /bin/bash " # reset shell
+                   +" -q %s " % queue
+                   # not sure this is working
+                   # +" -o %s/tthf-trex-utils/batch/log/%s_%s.oe" % (base_dir, opts.label, short_batch_file)
+                   +" -J %s " % short_batch_file
+                   +" -o %s.oe " % (self.relative_output_directory+'/'+short_batch_file)
+                   +" %s" % os.path.join(base_dir, job.script_path)
+                   )
+    def create_job(self, sample, systematic, variation, template_path=None):
+        self.generic_create_job(sample, systematic, variation, template_path)
+
+
+
+#___________________________________________________________
+
+class At3JobManager(JobManager):
+    "Job manager for at3 queues at pic"
+    def __init__(self, jobset):
+        super(LxbJobManager, self).__init__(jobset_label)
+        self.queues = ['at3_short', 'at3', 'at3_8h', 'at3_xxl']
+        self.template_path = 'VLQAnalysis/data/hbsm/batch/templates/ifae/one_lep_nom.sh'
+    def create_job(self, sample, systematic, variation, template_path=None):
+        self.generic_create_job(sample, systematic, variation, template_path)
+
+
+#___________________________________________________________
+
+if __name__=='__main__':
+    print "Testing job manager"
+
+    sc_hbsm = HbsmSampleCatalogue()
+    # sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/groups/hbsm/*.txt'))
+    sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/hbsm_test/hbsm.txt')) # test just on
+    sc_hbsm.samples = sc_hbsm.add_systematic_variations(sc_hbsm.samples)
+    input_from_dir = LocalDiskInterface(filelist_dir='VLQAnalysis/data/filelist',
+                                        base_input_dir='/tmp/gerbaudo/rucio')
+    input_from_eos = RucioEosCernInterface()
+    # sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_eos)
+    sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_dir)
+
+    job_manager = LxbJobManager('test_2016-07-20')
+    for sample in sc_hbsm.samples:
+        for systematic in sample.systematic_uncertainties:
+            for variation in [v for v in systematic.variations if v.name=='nominal']:
+                job_manager.create_job(sample, systematic, variation)
+    job_manager.submit_jobs()
diff --git a/python/sample_catalogues.py b/python/sample_catalogues.py
index e3f39c0..893f5e5 100644
--- a/python/sample_catalogues.py
+++ b/python/sample_catalogues.py
@@ -173,7 +173,8 @@ class SampleCatalogue(object):
     def add_filelists(self, samples=None, input_interface=None):
         "Attach a filelist to each one of the input samples x variations"
         samples = self.samples if not samples else samples
-        print "About the create filelists for %d samples; this might take some time." % len(samples)
+        if self.verbose:
+            print "About to check/create filelists for %d samples; this might take some time." % len(samples)
         for sample in samples:
             for uncertainty in sample.systematic_uncertainties:
                 for variation in uncertainty.variations:
-- 
GitLab


From 396f0390dc99a968a919f16328426b60f55faddb Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Thu, 21 Jul 2016 21:52:36 +0200
Subject: [PATCH 08/35] batch_utils: fix a few minor bugs

Submit also when 'not verbose'; remove unused code
---
 python/batch_utils.py | 121 +++++-------------------------------------
 1 file changed, 13 insertions(+), 108 deletions(-)

diff --git a/python/batch_utils.py b/python/batch_utils.py
index b33e959..2ac5033 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -47,105 +47,6 @@ def base_directory():
 def vlq_directory():
     return base_directory()+'/VLQAnalysis'
 
-def prepare_batch_files(opts=None, template_filename=None, samples=[]):
-    batch_dir = utils.mkdir_if_needed(relative_batch_directory(opts))
-    verbose = opts.verbose
-    absolute_output_base_dir = base_directory()
-    tar_file = opts.tarfile if opts.tarfile else "%s/%s/packages.tgz" % (base_directory(), batch_dir)
-    tar_file = os.path.abspath(tar_file)
-    if not os.path.exists(tar_file):
-        prepare_tar_file(tar_file_name=tar_file, verbose=opts.verbose)
-    # TODO : overwrite option
-    # TODO : for lxbatch might need additional job option parameters
-
-    batch_filenames = []
-    if opts.debug:
-        print "filling template %s" % (template_filename)
-    template_contents = open(template_filename).read()
-    for sample in samples:
-        sample_name = sample.short_name
-        # sample_name = 'hbsm'
-        print('fixme hack around sample_name')
-        if opts.syst=='all':
-            sample.use_all_uncertainties()
-        elif opts.syst=='object':
-            sample.use_object_uncertainties()
-        elif opts.syst=='weight':
-            sample.use_weight_uncertainties()
-        else:
-            sample.use_nominal_uncertainty()
-        systematics = sample.systematic_uncertainties
-        systematics = (utils.filter_with_regexp(systematics, opts.syst_include, func=lambda x: x.name) if opts.syst_include else
-                       systematics)
-        systematics = (utils.exclude_with_regexp(systematics, opts.syst_exclude, func=lambda x: x.name) if opts.syst_exclude else
-                       systematics)
-        # should we also filter with the same regex on variation.input_tree? it might be a useful feature
-        for systematic in systematics:
-            is_nominal = systematic.is_nominal
-            for variation in systematic.variations:
-                job_label = vlq_job_label(sample_name, variation.name)
-                parameters = {'sample_name' : sample_name,
-                              'tar_file' : tar_file,
-                              'relative_output_dir' : relative_output_directory(opts),
-                              'absolute_output_base_dir' : absolute_output_base_dir,
-                              'filelist_name' : sample.filelist_file,
-                              'input_tree' : variation.input_tree,
-                              'output_file' : (sample_name+'.root' if is_nominal else
-                                               "%s_%s.root" % (sample_name, variation.name)),
-                              'dsid' : sample.dsid,
-                              'compute_weight_sys' : ('true' if is_nominal else 'false'),
-                              'job_label' : job_label,
-                              }
-                batch_filename = batch_dir+'/'+sample_name+'_'+variation.name+'.sh'
-                if opts.debug:
-                    print "generating %s" % (batch_filename)
-                batch_file = open(batch_filename, 'w')
-                batch_file.write(template_contents.format(**parameters))
-                batch_file.close()
-                os.chmod(batch_filename, 0755)
-                if verbose:
-                    print "created batch file %s" % batch_filename
-                batch_filenames.append(batch_filename)
-    return batch_filenames
-
-def submit_job(batch_file=None, opts=None, lxbatch=False, at3=False):
-    queue = opts.queue
-    verbose = opts.verbose
-    base_dir = base_directory()
-    short_batch_file = os.path.splitext(os.path.basename(batch_file))[0] # w/out ext, =sample name
-
-    cmd = ''
-    if opts.lxbatch:
-        cmd = (" bsub "
-               +" -L /bin/bash " # reset shell
-               +" -q %s " % queue
-               # not sure this is working
-               # +" -o %s/tthf-trex-utils/batch/log/%s_%s.oe" % (base_dir, opts.label, short_batch_file)
-               +" -J %s " % short_batch_file
-               +" -o %s.oe " % (relative_output_directory(opts=opts)+'/'+short_batch_file)
-               +" %s" % os.path.join(base_dir, batch_file)
-               )
-    else:
-        cmd = (" qsub "
-               +" -j oe " # join stdout and stderr
-               +" -o %s/%s/%s.oe" % (base_dir, relative_output_directory(opts), short_batch_file)
-               +" -q %s " % queue
-               +" %s" % batch_file
-               )
-    if verbose:
-        print cmd
-    if opts.submit:
-        out = utils.get_command_output(cmd)
-        if verbose:
-            print out['stdout']
-            print out['stderr']
-
-def prepare_tar_file(tar_file_name=None, verbose=None):
-    cmd = "tar czf  %s BtaggingTRFandRW IFAEReweightingTools IFAETopFramework VLQAnalysis" % tar_file_name
-    cmd = cmd+" --exclude='.svn' --exclude='*.o' --exclude='*.so'"
-    if not os.path.exists(tar_file_name):
-        raise RuntimeError("Missing tar file; please create it with:\n\n"+cmd)
-
 #___________________________________________________________
 
 class Job(object):
@@ -154,6 +55,7 @@ class Job(object):
         self.script_path = script_path
     @property
     def short_batch_file(self):
+        "use to name the status and log files"
         return utils.filename_without_extension(self.script_path) # w/out ext, =sample name
 
 
@@ -220,13 +122,13 @@ class JobManager(object):
             cmd = self.job_submission_command(queue=self.queue, verbose=self.verbose,
                                               base_dir=self.absolute_output_base_dir, job=job)
             if self.verbose:
-                print cmd
-                if not self.dry_run:
-                    out = utils.get_command_output(cmd)
-                    status_path = os.path.join(self.relative_status_directory, job.short_name+'.submitted')
-                    with open(status_path, 'w') as status_file:
-                        status_file.write('stdout:\n'+out['stdout']+
-                                          'stderr:\n'+ out['stderr'])
+                print 'cmd: ',cmd
+            if not self.dry_run:
+                out = utils.get_command_output(cmd)
+                status_path = os.path.join(self.relative_status_directory, job.short_batch_file+'.submitted')
+                with open(status_path, 'w') as status_file:
+                    status_file.write('stdout:\n'+out['stdout']+
+                                      'stderr:\n'+ out['stderr'])
     def check_job(self, sample):
         raise NotImplementedError("todo")
     @property
@@ -310,6 +212,7 @@ class LxbJobManager(JobManager):
                    +" -o %s.oe " % (self.relative_output_directory+'/'+short_batch_file)
                    +" %s" % os.path.join(base_dir, job.script_path)
                    )
+            return cmd
     def create_job(self, sample, systematic, variation, template_path=None):
         self.generic_create_job(sample, systematic, variation, template_path)
 
@@ -339,10 +242,12 @@ if __name__=='__main__':
     input_from_dir = LocalDiskInterface(filelist_dir='VLQAnalysis/data/filelist',
                                         base_input_dir='/tmp/gerbaudo/rucio')
     input_from_eos = RucioEosCernInterface()
-    # sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_eos)
-    sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_dir)
+    sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_eos)
+    # sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_dir)
 
     job_manager = LxbJobManager('test_2016-07-20')
+    job_manager.verbose = True
+    job_manager.dry_run = False
     for sample in sc_hbsm.samples:
         for systematic in sample.systematic_uncertainties:
             for variation in [v for v in systematic.variations if v.name=='nominal']:
-- 
GitLab


From 83b1a6aece6f5970bb457d925b3e24de283dea1f Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Thu, 21 Jul 2016 22:08:58 +0200
Subject: [PATCH 09/35] updated lisf of samples 00-11

includes more hbsm samples and hplus.
I don't know why 410001 is removed.
---
 data/samples_HtX4TopsNtuple-00-00-11.txt | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/data/samples_HtX4TopsNtuple-00-00-11.txt b/data/samples_HtX4TopsNtuple-00-00-11.txt
index 7c72588..48afabd 100644
--- a/data/samples_HtX4TopsNtuple-00-00-11.txt
+++ b/data/samples_HtX4TopsNtuple-00-00-11.txt
@@ -1,5 +1,4 @@
 user.prose.410000.PowhegPythiaEvtGen.DAOD_TOPQ1.e3698_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-11_out.root/
-user.prose.410001.PowhegPythiaEvtGen.DAOD_TOPQ1.e3783_s2608_r7725_r7676_p2669.HtX4Tops_00-00-11_out.root/
 user.prose.410002.PowhegPythiaEvtGen.DAOD_TOPQ1.e3783_s2608_r7725_r7676_p2669.HtX4Tops_00-00-11_out.root/
 user.prose.410003.aMcAtNloHerwigppEvtGen.DAOD_TOPQ1.e4441_s2726_r7772_r7676_p2669.HtX4Tops_00-00-11_out.root/
 user.prose.410120.PowhegPythiaEvtGen.DAOD_TOPQ1.e4373_s2608_r7725_r7676_p2669.HtX4Tops_00-00-11_out.root/
@@ -51,6 +50,7 @@ user.prose.363457.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00
 user.prose.363460.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-11_out.root/
 user.prose.363463.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-11_out.root/
 user.prose.363466.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-11_out.root/
+user.prose.363469.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-11_out.root/
 user.prose.363472.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-11_out.root/
 user.prose.363475.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-11_out.root/
 user.prose.363478.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-11_out.root/
@@ -617,3 +617,17 @@ user.prose.341551.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.H
 user.prose.341552.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-11_out.root/
 user.prose.341554.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-11_out.root/
 user.prose.341555.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-11_out.root/
+user.prose.341556.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-11_out.root/
+user.prose.341557.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-11_out.root/
+user.prose.341558.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-11_out.root/
+user.prose.344066.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5075_s2726_r7772_r7676_p2669.HtX4Tops_00-00-11_out.root/
+user.prose.344074.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5052_s2726_r7772_r7676_p2669.HtX4Tops_00-00-11_out.root/
+user.prose.344066.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5075_a766_a821_r7676_p2669.HtX4Tops_00-00-11_out.root/
+user.prose.344068.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5075_a766_a821_r7676_p2669.HtX4Tops_00-00-11_out.root/
+user.prose.344072.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5075_a766_a821_r7676_p2669.HtX4Tops_00-00-11_out.root/
+user.prose.344073.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5075_a766_a821_r7676_p2669.HtX4Tops_00-00-11_out.root/
+user.prose.344074.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5052_a766_a821_r7676_p2669.HtX4Tops_00-00-11_out.root/
+user.prose.344076.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5052_a766_a821_r7676_p2669.HtX4Tops_00-00-11_out.root/
+user.prose.344079.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5052_a766_a821_r7676_p2669.HtX4Tops_00-00-11_out.root/
+user.prose.344080.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5052_a766_a821_r7676_p2669.HtX4Tops_00-00-11_out.root/
+user.prose.344081.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5052_a766_a821_r7676_p2669.HtX4Tops_00-00-11_out.root/
-- 
GitLab


From ee2fedefa99c3791080935dc7243683dd4cea9cf Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Mon, 1 Aug 2016 18:46:56 +0200
Subject: [PATCH 10/35] improvement: check rucio only when needed

---
 python/sample_catalogues.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/python/sample_catalogues.py b/python/sample_catalogues.py
index 893f5e5..9d4789f 100644
--- a/python/sample_catalogues.py
+++ b/python/sample_catalogues.py
@@ -603,14 +603,16 @@ class RseInterface(InputDataInterface):
 
     def generate_filelist(self, container, overwrite_filelist=False):
         container = container.strip('/')
-        has_rucio = any('RUCIO' in k for k in os.environ.keys())
-        has_voms = any('VOMS' in k for k in os.environ.keys()) # does not take into account expired token
-        if not has_rucio or not has_voms:
-            raise EnvironmentError("Invalid environment: please 'lsetup rucio' and 'voms-proxy-init -voms atlas'")
         filelist_path = os.path.join(self.filelist_dir, container+'.txt')
         if os.path.exists(filelist_path):
             if not overwrite_filelist:
                 return filelist_path
+        has_rucio = any('RUCIO' in k for k in os.environ.keys())
+        has_voms = any('VOMS' in k for k in os.environ.keys()) # does not take into account expired token
+        filelist_path = os.path.join(self.filelist_dir, container+'.txt')
+        if not has_rucio or not has_voms:
+            raise EnvironmentError("Invalid environment: please 'lsetup rucio' and 'voms-proxy-init -voms atlas'")
+
         cmd = "rucio list-file-replicas --rse {rse:s} {container:s} | grep {rse:s}".format(**{'rse':self.rse,
                                                                                               'container':container })
         def clean_line(line, rse, prefix_from, prefix_to):
-- 
GitLab


From 0f9e99d0773436c8ffc179c40d67615554e1ca8f Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Tue, 2 Aug 2016 11:52:57 +0200
Subject: [PATCH 11/35] move to prod 00-12; allow overwriting group, add
 merge_regions.py

Details:
- prod 00-12: update template and add new lists
- allow overwriting groups with option
- merge_regions.py : written for Nicola to run the fit on regions with low-stats
---
 data/hbsm/batch/templates/cern/one_lep_nom.sh |   2 +-
 data/samples_HtX4TopsNtuple-00-00-12.txt      | 543 ++++++++++++++++++
 python/batch_utils.py                         |   2 +-
 python/sample_catalogues.py                   |  53 +-
 4 files changed, 574 insertions(+), 26 deletions(-)
 create mode 100644 data/samples_HtX4TopsNtuple-00-00-12.txt

diff --git a/data/hbsm/batch/templates/cern/one_lep_nom.sh b/data/hbsm/batch/templates/cern/one_lep_nom.sh
index 2faa815..c024d36 100644
--- a/data/hbsm/batch/templates/cern/one_lep_nom.sh
+++ b/data/hbsm/batch/templates/cern/one_lep_nom.sh
@@ -54,7 +54,7 @@ function main() {{
     echo "Starting 'compilation' step `date`"   >> ${{subtask_log_file}} 2>&1
     # rcSetup Base,2.3.50                         >> ${{subtask_log_file}} 2>&1
     lsetup 'rcSetup -u'                         >> ${{subtask_log_file}} 2>&1
-    lsetup 'rcsetup Base,2.3.50'                >> ${{subtask_log_file}} 2>&1
+    lsetup 'rcsetup Base,2.4.14'                >> ${{subtask_log_file}} 2>&1
     rc find_packages                            >> ${{subtask_log_file}} 2>&1
     rc clean                                    >> ${{subtask_log_file}} 2>&1
     rc compile                                  >> ${{subtask_log_file}} 2>&1
diff --git a/data/samples_HtX4TopsNtuple-00-00-12.txt b/data/samples_HtX4TopsNtuple-00-00-12.txt
new file mode 100644
index 0000000..8a255b0
--- /dev/null
+++ b/data/samples_HtX4TopsNtuple-00-00-12.txt
@@ -0,0 +1,543 @@
+user.mcasolin.00276262.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00276329.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00276336.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00276416.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00276511.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00276689.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00276778.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00276790.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00276952.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00276954.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00278880.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00278912.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00278968.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00279169.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00279259.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00279279.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00279284.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00279345.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00279515.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00279598.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00279685.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00279764.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00279813.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00279867.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00279928.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00279932.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00279984.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00280231.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00280319.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00280368.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00280423.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00280464.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00280500.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00280520.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00280614.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00280673.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00280753.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00280853.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00280862.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00280950.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00280977.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00281070.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00281074.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00281075.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00281317.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00281411.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00282625.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00282631.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00282712.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00282784.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00282992.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00283074.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00283155.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00283270.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00283429.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00283608.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00283780.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00284006.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00284154.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00284213.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00284285.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00284420.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00284427.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00284484.physics_Main.DAOD_TOPQ4.r7562_p2521_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00297730.physics_Main.DAOD_TOPQ4.f694_m1583_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00298595.physics_Main.DAOD_TOPQ4.f698_m1594_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00298609.physics_Main.DAOD_TOPQ4.f698_m1594_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00298633.physics_Main.DAOD_TOPQ4.f698_m1594_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00298687.physics_Main.DAOD_TOPQ4.f698_m1594_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00298690.physics_Main.DAOD_TOPQ4.f698_m1594_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00298771.physics_Main.DAOD_TOPQ4.f698_m1594_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00298773.physics_Main.DAOD_TOPQ4.f698_m1594_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00298862.physics_Main.DAOD_TOPQ4.f696_m1588_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00298967.physics_Main.DAOD_TOPQ4.f696_m1588_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00299055.physics_Main.DAOD_TOPQ4.f698_m1594_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00299144.physics_Main.DAOD_TOPQ4.f698_m1594_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00299147.physics_Main.DAOD_TOPQ4.f698_m1594_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00299184.physics_Main.DAOD_TOPQ4.f698_m1594_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00299243.physics_Main.DAOD_TOPQ4.f698_m1594_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00299584.physics_Main.DAOD_TOPQ4.f703_m1600_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00300279.physics_Main.DAOD_TOPQ4.f705_m1606_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00300345.physics_Main.DAOD_TOPQ4.f705_m1606_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00300415.physics_Main.DAOD_TOPQ4.f705_m1606_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00300418.physics_Main.DAOD_TOPQ4.f705_m1606_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00300487.physics_Main.DAOD_TOPQ4.f705_m1606_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00300540.physics_Main.DAOD_TOPQ4.f705_m1606_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00300571.physics_Main.DAOD_TOPQ4.f705_m1606_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00300600.physics_Main.DAOD_TOPQ4.f708_m1606_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00300655.physics_Main.DAOD_TOPQ4.f708_m1606_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00300687.physics_Main.DAOD_TOPQ4.f708_m1606_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00300784.physics_Main.DAOD_TOPQ4.f708_m1606_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00300800.physics_Main.DAOD_TOPQ4.f708_m1606_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00300863.physics_Main.DAOD_TOPQ4.f708_m1606_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00300908.physics_Main.DAOD_TOPQ4.f708_m1606_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00301912.physics_Main.DAOD_TOPQ4.f709_m1611_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00301918.physics_Main.DAOD_TOPQ4.f709_m1611_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00301932.physics_Main.DAOD_TOPQ4.f709_m1611_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00301973.physics_Main.DAOD_TOPQ4.f709_m1611_p2667.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00302053.physics_Main.DAOD_TOPQ4.f709_m1611_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00302137.physics_Main.DAOD_TOPQ4.f709_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00302265.physics_Main.DAOD_TOPQ4.f709_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00302269.physics_Main.DAOD_TOPQ4.f709_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00302300.physics_Main.DAOD_TOPQ4.f711_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00302347.physics_Main.DAOD_TOPQ4.f711_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00302380.physics_Main.DAOD_TOPQ4.f711_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00302391.physics_Main.DAOD_TOPQ4.f711_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00302393.physics_Main.DAOD_TOPQ4.f711_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00302737.physics_Main.DAOD_TOPQ4.f711_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00302831.physics_Main.DAOD_TOPQ4.f711_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00302872.physics_Main.DAOD_TOPQ4.f716_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00302919.physics_Main.DAOD_TOPQ4.f715_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00302925.physics_Main.DAOD_TOPQ4.f715_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00302956.physics_Main.DAOD_TOPQ4.f715_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00303007.physics_Main.DAOD_TOPQ4.f715_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00303079.physics_Main.DAOD_TOPQ4.f715_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00303201.physics_Main.DAOD_TOPQ4.f715_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00303208.physics_Main.DAOD_TOPQ4.f715_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00303264.physics_Main.DAOD_TOPQ4.f715_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00303266.physics_Main.DAOD_TOPQ4.f715_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00303291.physics_Main.DAOD_TOPQ4.f716_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00303304.physics_Main.DAOD_TOPQ4.f716_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00303338.physics_Main.DAOD_TOPQ4.f716_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00303421.physics_Main.DAOD_TOPQ4.f716_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00303499.physics_Main.DAOD_TOPQ4.f716_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.00303560.physics_Main.DAOD_TOPQ4.f716_m1620_p2689.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302055.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4017_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302056.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4017_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302057.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4017_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302058.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4017_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302059.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4017_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302468.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302469.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302470.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302471.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302472.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302473.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302474.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302475.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302476.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302477.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302478.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302479.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302480.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302481.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302482.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302483.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302484.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302485.ProtosLHEFPythia8EvtGen.DAOD_TOPQ1.e4112_s2608_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.302777.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4476_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.341541.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.341543.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.341545.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.341546.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.341547.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.341548.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.341549.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.341550.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.341551.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.341552.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.341553.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.341554.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.341555.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.341556.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.341557.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.343365.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4706_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.343366.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4706_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.343367.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4706_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344066.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5075_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344067.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5075_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344068.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5075_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344069.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5075_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344070.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5075_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344071.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5075_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344072.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5075_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344073.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5075_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344074.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5052_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344075.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5052_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344076.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5052_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344077.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5052_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344078.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5052_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344079.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5052_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344080.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5052_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.344081.MadGraphPythia8EvtGen.DAOD_TOPQ1.e5052_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361091.Sherpa.DAOD_TOPQ1.e4607_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361092.Sherpa.DAOD_TOPQ1.e4607_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361093.Sherpa.DAOD_TOPQ1.e4607_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361094.Sherpa.DAOD_TOPQ1.e4607_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361095.Sherpa.DAOD_TOPQ1.e4607_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361096.Sherpa.DAOD_TOPQ1.e4607_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361097.Sherpa.DAOD_TOPQ1.e4607_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361300.Sherpa.DAOD_TOPQ1.e3651_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361301.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361302.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361303.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361304.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361305.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361306.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361307.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361308.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361309.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361310.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361311.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361312.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361313.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361314.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361315.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361316.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361317.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361318.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361319.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361320.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361321.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361322.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361323.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361324.Sherpa.DAOD_TOPQ1.e3651_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361325.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361326.Sherpa.DAOD_TOPQ1.e3651_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361327.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361328.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361329.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361330.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361331.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361332.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361333.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361334.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361335.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361336.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361337.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361338.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361339.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361340.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361341.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361342.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361343.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361344.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361345.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361346.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361347.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361348.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361349.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361350.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361351.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361352.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361353.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361354.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361355.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361356.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361357.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361358.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361359.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361360.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361361.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361362.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361363.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361364.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361365.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361366.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361367.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361368.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361369.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361370.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361371.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361372.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361373.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361374.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361375.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361376.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361377.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361378.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361379.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361380.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361381.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361382.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361383.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361384.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361385.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361386.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361387.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361388.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361389.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361390.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361391.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361392.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361393.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361394.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361395.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361396.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361397.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361398.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361399.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361400.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361401.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361402.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361403.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361404.Sherpa.DAOD_TOPQ1.e3651_s2586_s2174_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361405.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361406.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361407.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361408.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361409.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361410.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361411.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361412.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361413.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361414.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361415.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361416.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361417.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361418.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361419.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361420.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361421.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361422.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361423.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361424.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361425.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361426.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361427.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361428.Sherpa.DAOD_TOPQ1.e3733_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361429.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361430.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361431.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361432.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361433.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361434.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361435.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361436.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361437.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361438.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361439.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361440.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361441.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361442.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.361443.Sherpa.DAOD_TOPQ1.e4133_s2608_s2183_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363102.Sherpa.DAOD_TOPQ1.e4742_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363103.Sherpa.DAOD_TOPQ1.e4742_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363104.Sherpa.DAOD_TOPQ1.e4792_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363105.Sherpa.DAOD_TOPQ1.e4666_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363106.Sherpa.DAOD_TOPQ1.e4666_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363107.Sherpa.DAOD_TOPQ1.e4742_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363108.Sherpa.DAOD_TOPQ1.e4666_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363109.Sherpa.DAOD_TOPQ1.e4792_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363110.Sherpa.DAOD_TOPQ1.e4792_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363111.Sherpa.DAOD_TOPQ1.e4666_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363112.Sherpa.DAOD_TOPQ1.e4742_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363113.Sherpa.DAOD_TOPQ1.e4742_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363114.Sherpa.DAOD_TOPQ1.e4742_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363115.Sherpa.DAOD_TOPQ1.e4792_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363116.Sherpa.DAOD_TOPQ1.e4742_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363117.Sherpa.DAOD_TOPQ1.e4666_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363118.Sherpa.DAOD_TOPQ1.e4666_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363119.Sherpa.DAOD_TOPQ1.e4666_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363120.Sherpa.DAOD_TOPQ1.e4690_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363121.Sherpa.DAOD_TOPQ1.e4690_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363331.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363332.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363333.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363334.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363335.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363336.Sherpa.DAOD_TOPQ1.e4779_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363337.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363338.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363339.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363340.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363341.Sherpa.DAOD_TOPQ1.e4779_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363342.Sherpa.DAOD_TOPQ1.e4779_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363343.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363344.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363345.Sherpa.DAOD_TOPQ1.e4779_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363346.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363347.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363348.Sherpa.DAOD_TOPQ1.e4779_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363349.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363350.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363351.Sherpa.DAOD_TOPQ1.e4779_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363352.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363353.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363354.Sherpa.DAOD_TOPQ1.e4709_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363361.Sherpa.DAOD_TOPQ1.e4689_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363362.Sherpa.DAOD_TOPQ1.e4689_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363363.Sherpa.DAOD_TOPQ1.e4743_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363364.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363365.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363366.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363367.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363368.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363369.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363370.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363371.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363372.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363373.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363374.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363375.Sherpa.DAOD_TOPQ1.e4772_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363376.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363377.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363378.Sherpa.DAOD_TOPQ1.e4772_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363379.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363380.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363381.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363382.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363383.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363384.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363385.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363386.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363387.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363388.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363389.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363390.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363391.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363392.Sherpa.DAOD_TOPQ1.e4772_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363393.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363394.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363395.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363396.Sherpa.DAOD_TOPQ1.e4772_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363397.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363398.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363399.Sherpa.DAOD_TOPQ1.e4772_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363400.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363401.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363402.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363403.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363404.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363405.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363406.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363407.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363408.Sherpa.DAOD_TOPQ1.e4772_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363409.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363410.Sherpa.DAOD_TOPQ1.e4716_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363411.Sherpa.DAOD_TOPQ1.e4772_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363412.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363413.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363414.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363415.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363416.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363417.Sherpa.DAOD_TOPQ1.e4772_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363418.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363419.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363420.Sherpa.DAOD_TOPQ1.e4772_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363421.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363422.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363423.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363424.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363425.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363426.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363427.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363428.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363429.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363430.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363431.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363432.Sherpa.DAOD_TOPQ1.e4772_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363433.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363434.Sherpa.DAOD_TOPQ1.e4716_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363435.Sherpa.DAOD_TOPQ1.e4772_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363436.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363437.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363438.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363439.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363440.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363441.Sherpa.DAOD_TOPQ1.e4771_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363442.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363443.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363444.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363445.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363446.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363447.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363448.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363449.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363450.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363451.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363452.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363453.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363454.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363455.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363456.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363457.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363458.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363459.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363460.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363461.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363462.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363463.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363464.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363465.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363466.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363467.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363468.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363469.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363470.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363471.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363472.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363473.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363474.Sherpa.DAOD_TOPQ1.e4771_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363475.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363476.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363477.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363478.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363479.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363480.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363481.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363482.Sherpa.DAOD_TOPQ1.e4715_s2726_r7772_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.363483.Sherpa.DAOD_TOPQ1.e4715_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410000.PowhegPythiaEvtGen.DAOD_TOPQ1.e3698_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410011.PowhegPythiaEvtGen.DAOD_TOPQ1.e3824_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410012.PowhegPythiaEvtGen.DAOD_TOPQ1.e3824_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410013.PowhegPythiaEvtGen.DAOD_TOPQ1.e3753_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410014.PowhegPythiaEvtGen.DAOD_TOPQ1.e3753_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410015.PowhegPythiaEvtGen.DAOD_TOPQ1.e3753_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410016.PowhegPythiaEvtGen.DAOD_TOPQ1.e3753_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410017.PowhegPythiaEvtGen.DAOD_TOPQ1.e3978_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410018.PowhegPythiaEvtGen.DAOD_TOPQ1.e3978_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410019.PowhegPythiaEvtGen.DAOD_TOPQ1.e3978_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410020.PowhegPythiaEvtGen.DAOD_TOPQ1.e3978_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410025.PowhegPythiaEvtGen.DAOD_TOPQ1.e3998_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410026.PowhegPythiaEvtGen.DAOD_TOPQ1.e3998_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410047.PowhegHerwigppEvtGen.DAOD_TOPQ1.e4775_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410048.PowhegHerwigppEvtGen.DAOD_TOPQ1.e4775_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410062.PowhegPythiaEvtGen.DAOD_TOPQ1.e4132_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410063.PowhegPythiaEvtGen.DAOD_TOPQ1.e4132_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410066.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4111_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410067.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4111_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410068.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4111_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410073.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4631_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410074.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4631_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410075.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4631_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410080.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4111_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410081.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4111_s2608_s2183_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410099.PowhegPythiaEvtGen.DAOD_TOPQ1.e4403_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410100.PowhegPythiaEvtGen.DAOD_TOPQ1.e4403_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410101.PowhegPythiaEvtGen.DAOD_TOPQ1.e4403_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410102.PowhegPythiaEvtGen.DAOD_TOPQ1.e4403_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410107.PowhegPythiaEvtGen.DAOD_TOPQ1.e4403_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410108.PowhegPythiaEvtGen.DAOD_TOPQ1.e4403_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410109.PowhegPythiaEvtGen.DAOD_TOPQ1.e4403_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410110.PowhegPythiaEvtGen.DAOD_TOPQ1.e4403_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410111.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4632_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410112.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4632_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410113.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4632_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410114.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4632_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410115.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4632_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410116.MadGraphPythia8EvtGen.DAOD_TOPQ1.e4632_s2726_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410120.PowhegPythiaEvtGen.DAOD_TOPQ1.e4373_s2608_r7725_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410147.PowhegHerwigppEvtGen.DAOD_TOPQ1.e4595_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
+user.mcasolin.410148.PowhegHerwigppEvtGen.DAOD_TOPQ1.e4595_a766_a818_r7676_p2669.HtX4Tops_00-00-12_out.root/
diff --git a/python/batch_utils.py b/python/batch_utils.py
index 2ac5033..9ad2ead 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -245,7 +245,7 @@ if __name__=='__main__':
     sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_eos)
     # sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_dir)
 
-    job_manager = LxbJobManager('test_2016-07-20')
+    job_manager = LxbJobManager('test_2016-08-02')
     job_manager.verbose = True
     job_manager.dry_run = False
     for sample in sc_hbsm.samples:
diff --git a/python/sample_catalogues.py b/python/sample_catalogues.py
index 9d4789f..2d0705c 100644
--- a/python/sample_catalogues.py
+++ b/python/sample_catalogues.py
@@ -77,10 +77,10 @@ class Sample(object):
     @group.setter
     def group(self, value, overwrite_group=False):
         "this attribute is set by SampleCatalogue so we need to check it and cache it"
-        if not sample._group or sample._group==value:
-            sample._group = group
+        if not self._group or self._group==value:
+            self._group = value
         elif overwrite_group:
-            sample._group = group
+            self._group = value
         else:
             pass
 
@@ -695,8 +695,13 @@ if __name__=='__main__':
     #     print '\n'.join(s.full_name for s in uncategorised_samples)
 
     # -- tested: ok (go from one list to group files and back)
-    # groupfiles_directory = 'VLQAnalysis/data/groups/hbsm'
-    # sc.write_group_files(output_directory=groupfiles_directory)
+    groupfiles_directory = 'VLQAnalysis/data/groups/hbsm'
+    sc = HbsmSampleCatalogue()
+    sc = VlqSampleCatalogue()
+    sc.verbose = True
+    sc.add_samples_from_file(path='VLQAnalysis/data/samples_HtX4TopsNtuple-00-00-12.txt')
+    sc.categorise_samples(sc.samples) # only for specialised catalogues
+    sc.write_group_files(output_directory=groupfiles_directory)
     # sc2 = SampleCatalogue()
     # sc2.add_samples_from_group_files(glob.glob(groupfiles_directory+'/*.txt'))
     # print "%d samples from production file, and %d samples from group files" % (len(sc.samples), len(sc2.samples))
@@ -712,22 +717,22 @@ if __name__=='__main__':
     # for s in ttbar_samples:
     #     print s.short_name,' ',s.full_name
 
-    # -- tested: ok for both eos and disk
-    sc_hbsm = HbsmSampleCatalogue()
-    sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/groups/hbsm/*.txt'))
-    # sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/hbsm_test/hbsm.txt')) # test just on
-    sc_hbsm.samples = sc_hbsm.add_systematic_variations(sc_hbsm.samples)
-    input_from_dir = LocalDiskInterface(filelist_dir='VLQAnalysis/data/filelist',
-                                        base_input_dir='/tmp/gerbaudo/rucio')
-    input_from_eos = RucioEosCernInterface()
-    def print_filelists(samples):
-        for sample in samples:
-            for systematic in sample.systematic_uncertainties:
-                for variation in [v for v in systematic.variations if v.name=='nominal']:
-                    print "%s %s : %s" % (variation.name, sample.full_name, variation.filelist)
-    try:
-        print_filelists(sc_hbsm.samples)
-    except IOError:
-        print "Missing filelists, generating them"
-        sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_eos)
-    print_filelists(sc_hbsm.samples)
+# later    # -- tested: ok for both eos and disk
+# later    sc_hbsm = HbsmSampleCatalogue()
+# later    # sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/groups/hbsm/*.txt'))
+# later    sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/hbsm_test/hbsm.txt')) # test just on
+# later    sc_hbsm.samples = sc_hbsm.add_systematic_variations(sc_hbsm.samples)
+# later    input_from_dir = LocalDiskInterface(filelist_dir='VLQAnalysis/data/filelist',
+# later                                        base_input_dir='/tmp/gerbaudo/rucio')
+# later    input_from_eos = RucioEosCernInterface()
+# later    def print_filelists(samples):
+# later        for sample in samples:
+# later            for systematic in sample.systematic_uncertainties:
+# later                for variation in [v for v in systematic.variations if v.name=='nominal']:
+# later                    print "%s %s : %s" % (variation.name, sample.full_name, variation.filelist)
+# later    try:
+# later        print_filelists(sc_hbsm.samples)
+# later    except IOError:
+# later        print "Missing filelists, generating them"
+# later        sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_eos)
+# later    print_filelists(sc_hbsm.samples)
-- 
GitLab


From a1d6e07d52713a9ccf5ab12945ea14b80ebf11a5 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Mon, 17 Oct 2016 15:50:18 +0200
Subject: [PATCH 12/35] create tar when necessary; add --overwrite-tar option

---
 python/hbsm_submit.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/python/hbsm_submit.py b/python/hbsm_submit.py
index dda8876..f8b4e56 100755
--- a/python/hbsm_submit.py
+++ b/python/hbsm_submit.py
@@ -66,6 +66,7 @@ def main():
     parser.add_option('--lxbatch', action='store_true', help='lxbatch rather than at3')
     parser.add_option('--batch-template', help='batch template, default %s or %s'%(default_at3_batch_template, default_lx_batch_template))
     parser.add_option('--tarfile', default=None, help='the tar file will contain the code')
+    parser.add_option('--overwrite-tar', action=store_true, help='re-create tar even when it exists')
     parser.add_option('--output-dir', default='batch/output/', help='output base directory (will contain job subdirectories), default %default')
     parser.add_option('-v', '--verbose', action='store_true', help='print what it is doing')
     parser.add_option('-d', '--debug', action='store_true', help='print even more debugging information')
@@ -140,10 +141,10 @@ def prepare_batch_files(opts=None, template_filename=None, samples=[]):
     absolute_output_base_dir = base_directory()
     tar_file = opts.tarfile if opts.tarfile else "%s/%s/packages.tgz" % (base_directory(), batch_dir)
     tar_file = os.path.abspath(tar_file)
-    if not os.path.exists(tar_file):
+    if not os.path.exists(tar_file) or opts.overwrite_tar:
         prepare_tar_file(tar_file_name=tar_file, verbose=opts.verbose)
-    # TODO : overwrite option
-    # TODO : for lxbatch might need additional job option parameters
+    elif verbose:
+        print "Using existing tar file: %s"%tar_file
 
     batch_filenames = []
     if opts.debug:
@@ -228,9 +229,18 @@ def submit_job(batch_file=None, opts=None, lxbatch=False, at3=False):
             print out['stderr']
 
 def prepare_tar_file(tar_file_name=None, verbose=None):
-    cmd = "tar czf  %s BtaggingTRFandRW IFAEReweightingTools IFAETopFramework VLQAnalysis" % tar_file_name
-    cmd = cmd+" --exclude='.svn' --exclude='*.o' --exclude='*.so'"
-    if not os.path.exists(tar_file_name):
-        raise RuntimeError("Missing tar file; please create it with:\n\n"+cmd)
+    "create tar; behaves as GNU 'tar', i.e. by default it overwrites existing files"
+    cmd  = "tar czf  %s " % tar_file_name
+    cmd += " BtaggingTRFandRW IFAEReweightingTools IFAETopFramework VLQAnalysis"
+    cmd += " --exclude='.svn' --exclude='.git' --exclude='*.o' --exclude='*.so'"
+    out = get_command_output()
+    if out['returncode']!=0:
+        print "Failed to create tar file %s" % tar_file_name
+        print out['stderr']
+        print out['stdout']
+    elif verbose:
+        print out['stderr']
+        print out['stdout']
+        print "created tar file %s" % tar_file_name
 if __name__=='__main__':
     main()
-- 
GitLab


From 0a717e13c07ad010cfabccdfc6346e5fea15d531 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Mon, 17 Oct 2016 17:20:59 +0200
Subject: [PATCH 13/35] Fix catalogues (after breaking them in base and derived
 classes)

Details:
- make output_directory a member of the catalogue
- change a few staticmethod to classmethod
- determine_group_from_name: add forgotten 'return'
- put uncategorised samples in groups/uncategorised.txt
---
 python/sample_catalogues.py | 95 ++++++++++++++++++++++++++-----------
 1 file changed, 66 insertions(+), 29 deletions(-)

diff --git a/python/sample_catalogues.py b/python/sample_catalogues.py
index 2d0705c..8b0e822 100644
--- a/python/sample_catalogues.py
+++ b/python/sample_catalogues.py
@@ -25,6 +25,31 @@ organize them in different ways (e.g. special treatment of signal
 samples, systematic samples, etc.). This is implemented by
 specializing SampleCatalogue.
 
+Example usage:
+
+# Step 0
+# If you start from a 'production sample list' --> build group files
+#  > sc = VlqSampleCatalogue() # or HbsmSampleCatalogue
+#  > sc.verbose = True
+#  > sc.add_samples_from_file(path='VLQAnalysis/data/samples_HtX4TopsNtuple-00-00-12.txt')
+#  > sc.categorise_samples(sc.samples)
+#  > sc.write_group_files()
+
+# Step 1
+# Otherwise start from existing group files
+#  > sc.add_samples_from_group_files(sc.groupfiles_directory+'/*.txt'))
+
+# Step 2
+# Write filelists; the input can be either a local dir or a storage interface (eos, pnfs, ...)
+#  > input_from_dir = LocalDiskInterface(filelist_dir='VLQAnalysis/data/filelist',
+#  >                                     base_input_dir='/tmp/gerbaudo/rucio')
+#  > input_from_eos = RucioEosCernInterface()
+#  > sc.add_filelists(samples=sc.samples, input_interface=input_from_eos)
+
+# Note that the filelists are attached to each 'Variation' (not to each 'Sample').
+# Now you can start processing the samples.
+# See instructions in hbsm_submit.py
+
 TODO : implement special treatment of systematic samples.
 TODO : speedup the generation of filelists? (rucio can take hrs w/out
        multiprocessing, local disk is fine)
@@ -66,23 +91,22 @@ class Sample(object):
         return "Sample :"+ ', '.join("%s : '%s'" % (a, va) for a, va in [(pa, getattr(self, pa)) for pa in plain_attrs] if va)
 
     @property
-    def dsid(self):
+    def dsid(self, verbose=False):
         "No need to input this by hand, just extract it from full_name"
-        return utils.guess_id_from_name(samplename=self.full_name)
+        return utils.guess_id_from_name(samplename=self.full_name, verbose=verbose)
 
     @property
     def group(self):
         return self._group
 
     @group.setter
-    def group(self, value, overwrite_group=False):
+    def group(self, value):
         "this attribute is set by SampleCatalogue so we need to check it and cache it"
         if not self._group or self._group==value:
             self._group = value
-        elif overwrite_group:
-            self._group = value
         else:
-            pass
+            print "overwriting sample group from %s to %s" % (self._group, value)
+            self._group = value
 
     def use_all_uncertainties(self):
         self.systematic_uncertainties = catalogue.all_uncertainties()
@@ -120,6 +144,7 @@ class SampleCatalogue(object):
     def __init__(self):
         self.samples = []
         self.verbose = False
+        self.groupfiles_directory = 'VLQAnalysis/data/groups' # should be overwritten by specific catalogue
 
     def add_samples_from_group_files(self, paths=[]):
         """This is the usual method to populate the catalogue, from
@@ -139,25 +164,30 @@ class SampleCatalogue(object):
         """
         self.samples += [Sample(full_name=l, group=group) for l in SampleCatalogue.read_lines_from_txt(path)]
 
-    def write_group_files(self, output_directory=None, allow_uncategorised_samples=True):
+    def write_group_files(self, allow_uncategorised_samples=True):
         """After having called 'categorise_samples', you can write the samples organised in group files.
         Alternatively, you can also just write your group files by hand.
         """
-        if self.has_uncategorised_samples:
-            print "There are samples that do not belong to any group. The group files will not be complete."
+        num_uncategorised_samples = len(self.uncategorised_samples)
+        if num_uncategorised_samples:
+            print ("Warning: there are %d samples that do not belong to any group." % num_uncategorised_samples
+                   +" Please check 'uncategorised.txt'")
             if not allow_uncategorised_samples:
                 uncategorised = [s for s in self.samples if not s.group]
                 raise NotImplementedError("Do not know how to handle uncategorised samples:\n"+
                                           '\n'.join(s.full_name for s in uncategorised))
-        if not os.path.isdir(output_directory):
-            raise IOError("'%s' is not a valid directory" % output_directory)
+        utils.mkdir_if_needed(self.groupfiles_directory)
         samples_per_group = collections.defaultdict(list)
+        samples_per_group['uncategorised'] = []
         for s in self.samples:
             if not s.group:
-                continue
-            samples_per_group[s.group].append(s)
+                samples_per_group['uncategorised'].append(s)
+            else:
+                samples_per_group[s.group].append(s)
+        if not samples_per_group['uncategorised']:
+            samples_per_group.pop('uncategorised', None)
         for group, samples in samples_per_group.items():
-            filename = os.path.join(output_directory, group+'.txt')
+            filename = os.path.join(self.groupfiles_directory, group+'.txt')
             with open(filename, 'w') as output_file:
                 output_file.write('\n'.join(s.full_name for s in samples))
             if self.verbose:
@@ -187,8 +217,8 @@ class SampleCatalogue(object):
                     # systematic samples?
 
     @property
-    def has_uncategorised_samples(self):
-        return any(not s.group for s in self.samples)
+    def uncategorised_samples(self):
+        return [s for s in self.samples if not s.group]
 
     @property
     def groups(self):
@@ -197,17 +227,13 @@ class SampleCatalogue(object):
     def samples_from_group(self, group=''):
         return [s for s in self.samples if s.group==group]
 
-    @classmethod
-    def categorise_all_samples(cls, samples, overwrite_group=False, verbose=False):
-        raise NotImplementedError("This method should be implemented in the analysis-specific sub-classes")
-
     @classmethod
     def determine_group_from_name(cls, sample=None):
-        """Determine the group of this sample from its.
+        """Determine the group of this sample from its name.
 
         This is where the analysis-specific catalogues can implement their categorisation.
         """
-        return ('data' if cls.is_data(sample) else
+        group = ('data' if cls.is_data(sample) else
                 'ttbar' if cls.is_ttbar(sample) else
                 'wjets' if cls.is_wjets(sample) else
                 'zjets' if cls.is_zjets(sample) else
@@ -217,6 +243,7 @@ class SampleCatalogue(object):
                 'tth' if cls.is_tth(sample) else
                 'fourtop' if cls.is_fourtop(sample) else
                 None)
+        return group
 
     @staticmethod
     def is_data(sample):
@@ -304,6 +331,11 @@ class SampleCatalogue(object):
 
 class VlqSampleCatalogue(SampleCatalogue):
     "Catalogue with features that are specific to the VLQ analysis"
+
+    def __init__(self):
+        super(VlqSampleCatalogue, self).__init__()
+        self.groupfiles_directory = 'VLQAnalysis/data/groups/vlq'
+
     @classmethod
     def determine_group_from_name(cls, sample=None):
         """Determine the group of this sample from its.
@@ -314,6 +346,7 @@ class VlqSampleCatalogue(SampleCatalogue):
                  'uerdpp' if cls.is_uerdpp(sample) else
                  'fourtopci' if cls.is_fourtopci(sample) else
                  SampleCatalogue.determine_group_from_name(sample))
+        return group
 
     @classmethod
     def categorise_samples(cls, samples, overwrite_group=False, verbose=False):
@@ -344,7 +377,7 @@ class VlqSampleCatalogue(SampleCatalogue):
     def is_fourtopci(sample):
         return any(str(dsid) in sample.full_name for dsid in [302777])
 
-    @staticmethod
+    @classmethod
     def vlq_short_name(cls, sample=None):
         dsid = int(sample.dsid)
         return ('VLQ_TT_600'  if dsid==302469 else
@@ -363,7 +396,7 @@ class VlqSampleCatalogue(SampleCatalogue):
                 'VLQ_TT_1400' if dsid==302482 else
                 None)
 
-    @staticmethod
+    @classmethod
     def uerdpp_short_name(cls, sample=None):
         dsid = int(sample.dsid)
         return ('UEDRPP_1000' if dsid==302055 else
@@ -377,16 +410,20 @@ class VlqSampleCatalogue(SampleCatalogue):
 
 class HbsmSampleCatalogue(SampleCatalogue):
     "Catalogue with features that are specific to the HBSM analysis"
+
+    def __init__(self):
+        super(HbsmSampleCatalogue, self).__init__()
+        self.groupfiles_directory = 'VLQAnalysis/data/groups/hbsm'
+
     @classmethod
     def determine_group_from_name(cls, sample=None):
         """Determine the group of this sample from its.
 
         This is where the analysis-specific catalogues can implement their categorisation.
         """
-        group = ('vlq' if cls.is_vlq(sample) else
-                 'uerdpp' if cls.is_uerdpp(sample) else
-                 'fourtopci' if cls.is_fourtopci(sample) else
+        group = ('hbsm' if cls.is_hbsm(sample) else
                  SampleCatalogue.determine_group_from_name(sample))
+        return group
 
     @classmethod
     def categorise_samples(cls, samples, overwrite_group=False, verbose=False):
@@ -695,13 +732,13 @@ if __name__=='__main__':
     #     print '\n'.join(s.full_name for s in uncategorised_samples)
 
     # -- tested: ok (go from one list to group files and back)
-    groupfiles_directory = 'VLQAnalysis/data/groups/hbsm'
     sc = HbsmSampleCatalogue()
     sc = VlqSampleCatalogue()
     sc.verbose = True
     sc.add_samples_from_file(path='VLQAnalysis/data/samples_HtX4TopsNtuple-00-00-12.txt')
     sc.categorise_samples(sc.samples) # only for specialised catalogues
-    sc.write_group_files(output_directory=groupfiles_directory)
+    sc.write_group_files()
+
     # sc2 = SampleCatalogue()
     # sc2.add_samples_from_group_files(glob.glob(groupfiles_directory+'/*.txt'))
     # print "%d samples from production file, and %d samples from group files" % (len(sc.samples), len(sc2.samples))
-- 
GitLab


From 1325fb740531d25eeeeb48e56d8fbf64043719dd Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Mon, 17 Oct 2016 17:54:54 +0200
Subject: [PATCH 14/35] add Sample.job_options

Details:
This is where we can specify sample-specific job options.
The job options can be specified in the text files; they affect all
the lines following the keyword 'config:'.
The keyword is the same one used in MultibjetsAnalsysis.
---
 python/sample_catalogues.py | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/python/sample_catalogues.py b/python/sample_catalogues.py
index 8b0e822..318dc40 100644
--- a/python/sample_catalogues.py
+++ b/python/sample_catalogues.py
@@ -46,7 +46,9 @@ Example usage:
 #  > input_from_eos = RucioEosCernInterface()
 #  > sc.add_filelists(samples=sc.samples, input_interface=input_from_eos)
 
-# Note that the filelists are attached to each 'Variation' (not to each 'Sample').
+# Note1: the filelists are attached to each 'Variation' (not to each 'Sample').
+# Note2: sample-specific job options can be inserted in the txt files
+#        (as in MultibjetsAnalysis, with the '# config:' comment)
 # Now you can start processing the samples.
 # See instructions in hbsm_submit.py
 
@@ -76,10 +78,14 @@ class Sample(object):
     """Holds info about a sample and its attributes.
     'full_name' is always there; the other attributes are filled as needed.
     """
-    def __init__(self, short_name=None, full_name=None, group=None, filelist_dir=None, filelist_file=None, ds_input=None):
+    def __init__(self, short_name=None, full_name=None, group=None,
+                 job_options=None,
+                 filelist_dir=None, filelist_file=None,
+                 ds_input=None):
         self.short_name = short_name
         self.full_name = full_name
         self._group = group
+        self.job_options = job_options
         self.ds_input = ds_input
         self.systematic_uncertainties = []
         if not full_name:
@@ -138,7 +144,6 @@ class SampleCatalogue(object):
     In general one builds the catalogue from the group files.
 
     TODO attach syst uncertainties to samples
-    TODO attach
 
     """
     def __init__(self):
@@ -162,7 +167,13 @@ class SampleCatalogue(object):
         new production. path should be the file 'datasets_to_download.txt'
         generated from HtX4TopsNtuple.
         """
-        self.samples += [Sample(full_name=l, group=group) for l in SampleCatalogue.read_lines_from_txt(path)]
+        job_options = None
+        keyword_job_option = 'config:' # same convention as in MultibjetsAnalysis
+        for line in SampleCatalogue.read_lines_from_txt(path, keywords_useful_comment_line=[keyword_job_option]):
+            if keyword_job_option in line:
+                job_options = line[line.find(keyword_job_option)+len(keyword_job_option):].strip()
+                continue # will affect all upcoming samples
+            self.samples.append(Sample(full_name=line, group=group, job_options=job_options))
 
     def write_group_files(self, allow_uncategorised_samples=True):
         """After having called 'categorise_samples', you can write the samples organised in group files.
@@ -288,14 +299,16 @@ class SampleCatalogue(object):
         return any(str(dsid) in sample.full_name for dsid in [410080])
 
     @staticmethod
-    def read_lines_from_txt(txt_filename):
+    def read_lines_from_txt(txt_filename, keywords_useful_comment_line=[]):
         "parse a file dropping comment and empty lines"
         def is_comment_or_empty(l):
             l = l.strip()
             return not l or l.startswith('#')
         lines = []
         with open(txt_filename) as f:
-            lines = [l.strip() for l in f.readlines() if not is_comment_or_empty(l)]
+            lines = [l.strip() for l in f.readlines()
+                     if not is_comment_or_empty(l)
+                     or any(k in l for k in keywords_useful_comment_line)]
         return lines
 
     def write_script_to_generate_rucio_eos_lists(self, script_filename='generate_eos_filelists.sh',
@@ -738,10 +751,9 @@ if __name__=='__main__':
     sc.add_samples_from_file(path='VLQAnalysis/data/samples_HtX4TopsNtuple-00-00-12.txt')
     sc.categorise_samples(sc.samples) # only for specialised catalogues
     sc.write_group_files()
-
-    # sc2 = SampleCatalogue()
-    # sc2.add_samples_from_group_files(glob.glob(groupfiles_directory+'/*.txt'))
-    # print "%d samples from production file, and %d samples from group files" % (len(sc.samples), len(sc2.samples))
+    sc2 = SampleCatalogue()
+    sc2.add_samples_from_group_files(glob.glob(sc.groupfiles_directory+'/*.txt'))
+    print "%d samples from production file, and %d samples from group files" % (len(sc.samples), len(sc2.samples))
 
     # -- tested: ok (also the ttbar ht + hf splitting)
     # groupfiles_directory = 'VLQAnalysis/data/groups/hbsm'
-- 
GitLab


From 76aef4a1e3777a9a373f264ff7adbd9b290c4f1b Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Tue, 18 Oct 2016 18:34:01 +0200
Subject: [PATCH 15/35] create tar when needed (amends a1d6e07)

The commit a1d6e07 was made on hbsm_sumbit.py, but it really should
have been on batch_utils.py. I had forgotten about this draft code.
---
 python/batch_utils.py | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/python/batch_utils.py b/python/batch_utils.py
index 9ad2ead..e4a18a9 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -75,9 +75,9 @@ class JobManager(object):
         self._template_contents = None # cached
         self.verbose = verbose
         self.debug = debug
+        self.overwrite_tar = False
         self.create_directories()
 
-
     def create_job(self, sample, systematic, variation, template_path=None):
         "This will need access to several specialised attributes (template, dirs, etc.)"
         raise NotImplementedError("create_job should be implemented in each specialised JobManager class")
@@ -157,11 +157,26 @@ class JobManager(object):
     def default_tar_file(self):
         return base_directory()+'/'+self.relative_input_directory+'/packages.tgz'
     def check_tar_file(self, path):
-        if not os.path.exists(path):
-            cmd  = "tar czf  %s " % path
-            cmd += " BtaggingTRFandRW IFAEReweightingTools IFAETopFramework VLQAnalysis"
-            cmd += " --exclude='.svn' --exclude='*.o' --exclude='*.so'"
-            raise RuntimeError("Missing tar file; please create it with:\n\n"+cmd)
+        if not os.path.exists(path) or self.overwrite_tar:
+            self.prepare_tar_file(tar_file_name=path)
+        elif self.verbose:
+            print "Using existing tar file: %s"%path
+
+    def prepare_tar_file(self, tar_file_name=None):
+        "create tar; behaves as GNU 'tar', i.e. by default it overwrites existing files"
+        cmd  = "tar czf  %s " % tar_file_name
+        cmd += " BtaggingTRFandRW IFAEReweightingTools IFAETopFramework VLQAnalysis"
+        cmd += " --exclude='.svn' --exclude='.git' --exclude='*.o' --exclude='*.so'"
+        out = utils.get_command_output(cmd)
+        if out['returncode']!=0:
+            print "Failed to create tar file %s" % tar_file_name
+            print out['stderr']
+            print out['stdout']
+        elif self.verbose:
+            print out['stderr']
+            print out['stdout']
+            print "Created tar file %s" % tar_file_name
+
     @property
     def tar_file(self):
         if not self._tar_file: # set value on first call (and check existence if necessary)
-- 
GitLab


From 1b7ba8d19d904345a5b42fb3ddcb37e54efff528 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Tue, 18 Oct 2016 18:41:00 +0200
Subject: [PATCH 16/35] Update templates with correct output directories

Details:
- rename absolute_output_base_dir -> absolute_base_dir
- add to templates input/log/output/status dirs
- add At3JobManager.job_submission_command
- templates: move .fail/.done at the end of the filename
---
 data/hbsm/batch/templates/cern/one_lep_nom.sh | 25 ++++++------
 data/hbsm/batch/templates/ifae/one_lep_nom.sh | 23 ++++++-----
 python/batch_utils.py                         | 38 ++++++++++++-------
 3 files changed, 51 insertions(+), 35 deletions(-)

diff --git a/data/hbsm/batch/templates/cern/one_lep_nom.sh b/data/hbsm/batch/templates/cern/one_lep_nom.sh
index c024d36..03b2b5b 100644
--- a/data/hbsm/batch/templates/cern/one_lep_nom.sh
+++ b/data/hbsm/batch/templates/cern/one_lep_nom.sh
@@ -13,7 +13,7 @@ export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase
 source ${{ATLAS_LOCAL_ROOT_BASE}}/user/atlasLocalSetup.sh
 
 function abortJob {{
-  touch "{absolute_output_base_dir:s}/{relative_output_dir:s}/fail_{job_label:s}.${{LSB_JOBID}}"
+  touch "{absolute_base_dir:s}/{relative_status_dir:s}/fail_{job_label:s}.${{LSB_JOBID}}"
   exit 1
 }}
 
@@ -21,7 +21,7 @@ function main() {{
 
     local tar_file="{tar_file:s}"
     local relative_output_dir="{relative_output_dir:s}"
-    local absolute_output_base_dir="{absolute_output_base_dir:s}"
+    local absolute_base_dir="{absolute_base_dir:s}"
 
     local exe="VLQAnalysis"
 
@@ -46,11 +46,12 @@ function main() {{
     # echo "contents of tarball (./all_links/):"
     # ls -ltrh ./all_links/
 
+    mkdir -p ${{relative_log_dir}}
     mkdir -p ${{relative_output_dir}}
 
     echo "Processing ${{sample}} `date`"
 
-    subtask_log_file=${{relative_output_dir}}/compile_{job_label:s}.log
+    subtask_log_file=${{relative_log_dir}}/compile_{job_label:s}.log
     echo "Starting 'compilation' step `date`"   >> ${{subtask_log_file}} 2>&1
     # rcSetup Base,2.3.50                         >> ${{subtask_log_file}} 2>&1
     lsetup 'rcSetup -u'                         >> ${{subtask_log_file}} 2>&1
@@ -64,7 +65,7 @@ function main() {{
  # --msgLevel=DEBUG \
 # for test
  # --nEvents=10 \
-    subtask_log_file=${{relative_output_dir}}/run_{job_label:s}.log
+    subtask_log_file=${{relative_log_dir}}/run_{job_label:s}.log
     echo "Starting 'run' step `date`"  >> ${{subtask_log_file}} 2>&1
     ${{exe}} \
  --outputFile={relative_output_dir:}/{output_file:s} \
@@ -95,21 +96,23 @@ function main() {{
  >> ${{subtask_log_file}} 2>&1
 
     echo "Finishing 'run' step `date`" >> ${{subtask_log_file}} 2>&1
-    if test -e {relative_output_dir:}/{output_file:s}.root
+    if test -e {relative_output_dir:}/{output_file:s}
         then
-        touch ${{relative_output_dir}}/done_{job_label:s}.${{LSB_JOBID}}
+        echo "files being copied: [begin]"
+        ls -ltrh ${{relative_output_dir}}/*
+        ls -ltrh ${{relative_log_dir}}/*
+        echo "files being copied: [end]"
+        rsync -az ${{relative_output_dir}}/* ${{absolute_base_dir}}/${{relative_output_dir}}
+        rsync -az ${{relative_log_dir}}/*    ${{absolute_base_dir}}/${{relative_log_dir}}
+        touch "{absolute_base_dir:s}/{relative_status_dir:s}/{job_label:s}.${{LSB_JOBID}}.done"
     fi
-    echo "files being copied: [begin]"
-    ls -ltrh ${{relative_output_dir}}/*
-    echo "files being copied: [begin]"
-    rsync -az ${{relative_output_dir}}/* ${{absolute_output_base_dir}}/${{relative_output_dir}}
 
     echo "Processed ${{sample}} `date`"
 
     # echo "Cleaning up"
     # cd ..
     # rm -rf $TMPDIR
-    test -f "${{absolute_output_base_dir}}/${{relative_output_dir}}/done_{job_label:s}.${{LSB_JOBID}}" || abortJob
+    test -f "${{absolute_base_dir}}/${{relative_output_dir}}/{job_label:s}.${{LSB_JOBID}}.done" || abortJob
 }}
 
 main
diff --git a/data/hbsm/batch/templates/ifae/one_lep_nom.sh b/data/hbsm/batch/templates/ifae/one_lep_nom.sh
index ecd7ce3..a4c26b0 100644
--- a/data/hbsm/batch/templates/ifae/one_lep_nom.sh
+++ b/data/hbsm/batch/templates/ifae/one_lep_nom.sh
@@ -13,7 +13,7 @@ export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase
 source ${{ATLAS_LOCAL_ROOT_BASE}}/user/atlasLocalSetup.sh
 
 function abortJob {{
-  touch "{absolute_output_base_dir:s}/{relative_output_dir:s}/fail_{job_label:s}.${{LSB_JOBID}}"
+  touch "{absolute_base_dir:s}/{relative_output_dir:s}/{job_label:s}.${{LSB_JOBID}}.fail"
   exit 1
 }}
 
@@ -21,7 +21,7 @@ function main() {{
 
     local tar_file="{tar_file:s}"
     local relative_output_dir="{relative_output_dir:s}"
-    local absolute_output_base_dir="{absolute_output_base_dir:s}"
+    local absolute_base_dir="{absolute_base_dir:s}"
 
     local exe="VLQAnalysis"
 
@@ -40,11 +40,12 @@ function main() {{
     # echo "contents of tarball (./all_links/):"
     # ls -ltrh ./all_links/
 
+    mkdir -p ${{relative_log_dir}}
     mkdir -p ${{relative_output_dir}}
 
     echo "Processing ${{sample}} `date`"
 
-    subtask_log_file=${{relative_output_dir}}/compile_{job_label:s}.log
+    subtask_log_file=${{relative_log_dir}}/compile_{job_label:s}.log
     echo "Starting 'compilation' step `date`"   >> ${{subtask_log_file}} 2>&1
     # rcSetup Base,2.3.50                         >> ${{subtask_log_file}} 2>&1
     lsetup 'rcSetup -u'                         >> ${{subtask_log_file}} 2>&1
@@ -57,7 +58,7 @@ function main() {{
  # --msgLevel=DEBUG \
 # for test
  # --nEvents=10 \
-    subtask_log_file=${{relative_output_dir}}/run_{job_label:s}.log
+    subtask_log_file=${{relative_log_dir}}/run_{job_label:s}.log
     echo "Starting 'run' step `date`"  >> ${{subtask_log_file}} 2>&1
     ${{exe}} \
  --outputFile={relative_output_dir:}/{output_file:s} \
@@ -90,18 +91,20 @@ function main() {{
     echo "Finishing 'run' step `date`" >> ${{subtask_log_file}} 2>&1
     if test -e {relative_output_dir:}/{output_file:s}
         then
-        touch ${{relative_output_dir}}/done_{job_label:s}.${{LSB_JOBID}}
+        echo "files being copied: [begin]"
+        ls -ltrh ${{relative_output_dir}}/*
+        ls -ltrh ${{relative_log_dir}}/*
+        echo "files being copied: [end]"
+        rsync -az ${{relative_output_dir}}/* ${{absolute_base_dir}}/${{relative_output_dir}}
+        rsync -az ${{relative_log_dir}}/*    ${{absolute_base_dir}}/${{relative_log_dir}}
+        touch "{absolute_base_dir:s}/{relative_status_dir:s}/{job_label:s}.${{LSB_JOBID}}.done"
     fi
-    echo "files being copied: [begin]"
-    ls -ltrh ${{relative_output_dir}}/*
-    echo "files being copied: [begin]"
-    rsync -az ${{relative_output_dir}}/* ${{absolute_output_base_dir}}/${{relative_output_dir}}
 
     echo "Processed ${{sample}} `date`"
 
     echo "Cleaning up"
     rm -rf $TMPDIR/*
-    test -f "${{absolute_output_base_dir}}/${{relative_output_dir}}/done_{job_label:s}.${{LSB_JOBID}}" || abortJob
+    test -f "${{absolute_base_dir}}/${{relative_output_dir}}/{job_label:s}.${{LSB_JOBID}}.done" || abortJob
 
 }}
 
diff --git a/python/batch_utils.py b/python/batch_utils.py
index e4a18a9..9679511 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -67,7 +67,7 @@ class JobManager(object):
         self.jobset = jobset_label
         self.queues = []
         self._queue = None
-        self.absolute_output_base_dir = base_directory()
+        self.absolute_base_dir = base_directory()
         self.dry_run = True
         self.jobs = []
         self._tar_file = None
@@ -96,8 +96,11 @@ class JobManager(object):
         job_label = self.job_label(sample_name=sample_name, variation_name=variation.name)
         parameters = {'sample_name' : sample_name,
                       'tar_file' : self.tar_file,
+                      'absolute_base_dir' : self.absolute_base_dir,
+                      'relative_input_dir' : self.relative_input_directory,
+                      'relative_log_dir' : self.relative_log_directory,
                       'relative_output_dir' : self.relative_output_directory,
-                      'absolute_output_base_dir' : self.absolute_output_base_dir,
+                      'relative_status_dir' : self.relative_status_directory,
                       'filelist_name' : variation.filelist,
                       'input_tree' : variation.input_tree,
                       'output_file' : (sample_name+'.root' if is_nominal else
@@ -217,17 +220,16 @@ class LxbJobManager(JobManager):
         self.queues = ['8nm', '1nh', '8nh', '1nd', '2nd', '1nw', '2nw', 'test'] # bqueues -u ${USER}
         self.template_path = 'VLQAnalysis/data/hbsm/batch/templates/cern/one_lep_nom.sh'
     def job_submission_command(self, queue=None, verbose=None, base_dir=None, job=None):
-            short_batch_file = job.short_batch_file
-            cmd = (" bsub "
-                   +" -L /bin/bash " # reset shell
-                   +" -q %s " % queue
-                   # not sure this is working
-                   # +" -o %s/tthf-trex-utils/batch/log/%s_%s.oe" % (base_dir, opts.label, short_batch_file)
-                   +" -J %s " % short_batch_file
-                   +" -o %s.oe " % (self.relative_output_directory+'/'+short_batch_file)
-                   +" %s" % os.path.join(base_dir, job.script_path)
-                   )
-            return cmd
+        cmd = (" bsub "
+               +" -L /bin/bash " # reset shell
+               +" -q %s " % queue
+               # not sure this is working
+               # +" -o %s/tthf-trex-utils/batch/log/%s_%s.oe" % (base_dir, opts.label, short_batch_file)
+               +" -J %s " % job.short_batch_file
+               +" -o %s.oe " % (self.relative_output_directory+'/'+job.short_batch_file)
+               +" %s" % os.path.join(base_dir, job.script_path)
+               )
+        return cmd
     def create_job(self, sample, systematic, variation, template_path=None):
         self.generic_create_job(sample, systematic, variation, template_path)
 
@@ -243,6 +245,14 @@ class At3JobManager(JobManager):
         self.template_path = 'VLQAnalysis/data/hbsm/batch/templates/ifae/one_lep_nom.sh'
     def create_job(self, sample, systematic, variation, template_path=None):
         self.generic_create_job(sample, systematic, variation, template_path)
+    def job_submission_command(self, queue=None, verbose=None, base_dir=None, job=None):
+        cmd = (" qsub "
+               +" -j oe " # join stdout and stderr
+               +" -o %s.oe " % (self.relative_output_directory+'/'+job.short_batch_file)
+               +" -q %s " % queue
+               +" %s" % os.path.join(base_dir, job.script_path)
+               )
+        return cmd
 
 
 #___________________________________________________________
@@ -252,7 +262,7 @@ if __name__=='__main__':
 
     sc_hbsm = HbsmSampleCatalogue()
     # sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/groups/hbsm/*.txt'))
-    sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/hbsm_test/hbsm.txt')) # test just on
+    sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/groups/hbsm/hbsm.txt'))
     sc_hbsm.samples = sc_hbsm.add_systematic_variations(sc_hbsm.samples)
     input_from_dir = LocalDiskInterface(filelist_dir='VLQAnalysis/data/filelist',
                                         base_input_dir='/tmp/gerbaudo/rucio')
-- 
GitLab


From 158e22af1e167990625e7a72d7a1d0e11d93ebc3 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Wed, 19 Oct 2016 11:06:14 +0200
Subject: [PATCH 17/35] improve templates

Details:
- split in two functions prepare/run: will make it easier to join samples
- explicit variables (easier join)
- log files to log directory (not to output one)
- add `|| true` after `VLQAnalysis` command, so that we can write the
  status file even when it crashes
- tested on lxbatch (not on at3)
---
 data/hbsm/batch/templates/cern/one_lep_nom.sh | 124 +++++++++---------
 data/hbsm/batch/templates/ifae/one_lep_nom.sh | 109 ++++++++-------
 python/batch_utils.py                         |   8 +-
 3 files changed, 122 insertions(+), 119 deletions(-)

diff --git a/data/hbsm/batch/templates/cern/one_lep_nom.sh b/data/hbsm/batch/templates/cern/one_lep_nom.sh
index 03b2b5b..8ac1600 100644
--- a/data/hbsm/batch/templates/cern/one_lep_nom.sh
+++ b/data/hbsm/batch/templates/cern/one_lep_nom.sh
@@ -1,10 +1,22 @@
 #!/bin/bash
 
-#BSUB -o test.%J
-
 # template to submit VLQ jobs on the lxbatch cluster
 #
-# note to self:
+# Note to self:
+# if you want to use curly braces to highlight bash variables, you
+# need to escape them with double curly (otherwise they're picked up
+# by the python string.format
+#
+# Variable replacement strategy:
+# - replace variables are early as possible, to make commands in
+#   resulting scritps explicit (--> more easily 'joinable')
+# - when there is a value repeated several times, declare a local
+#   variable within the function
+# - assume mininal env vars (i.e. only LSB_JOBID)
+# Log file strategy:
+# - keep separate log files for compilation and processing, since we
+#   might want to join samples
+# - keep explicit logfile names within each function (easier joining)
 # if you want to use curly braces to highlight bash variables, you
 # need to escape them with double curly (otherwise they're picked up
 # by the python string.format
@@ -12,66 +24,37 @@
 export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase
 source ${{ATLAS_LOCAL_ROOT_BASE}}/user/atlasLocalSetup.sh
 
-function abortJob {{
-  touch "{absolute_base_dir:s}/{relative_status_dir:s}/fail_{job_label:s}.${{LSB_JOBID}}"
-  exit 1
-}}
-
-function main() {{
 
-    local tar_file="{tar_file:s}"
-    local relative_output_dir="{relative_output_dir:s}"
+function prepare() {{
     local absolute_base_dir="{absolute_base_dir:s}"
-
-    local exe="VLQAnalysis"
-
-    local subtask_log_file=""
-
-    local tmp_dir=${{TMPDIR}}/LSF_${{LSB_JOBID}}
-
-    mkdir -vp ${{tmp_dir}}
-    cd ${{tmp_dir}}
-
-    echo "Start `date`"
-    echo "working from `pwd`"
-    echo "current contents:"
+    echo "Current directory: `pwd`"
+    echo "Current contents:"
     ls -ltrh
-    echo "using tar file ${{tar_file}}"
-    ls ${{tar_file}}
-    cp -p ${{tar_file}} ./tarball.tgz
+    cp -p {tar_file:s} ./tarball.tgz
     echo "untar tarball.tgz"
-    ls tarball.tgz
-
+    ls -lh tarball.tgz
     tar xzf tarball.tgz
-    # echo "contents of tarball (./all_links/):"
-    # ls -ltrh ./all_links/
-
-    mkdir -p ${{relative_log_dir}}
-    mkdir -p ${{relative_output_dir}}
 
-    echo "Processing ${{sample}} `date`"
-
-    subtask_log_file=${{relative_log_dir}}/compile_{job_label:s}.log
+    mkdir -p {relative_log_dir:s}
+    mkdir -p {relative_output_dir:s}
+    local subtask_log_file={relative_log_dir:s}/compile_{job_label:s}.log
     echo "Starting 'compilation' step `date`"   >> ${{subtask_log_file}} 2>&1
-    # rcSetup Base,2.3.50                         >> ${{subtask_log_file}} 2>&1
-    lsetup 'rcSetup -u'                         >> ${{subtask_log_file}} 2>&1
-    lsetup 'rcsetup Base,2.4.14'                >> ${{subtask_log_file}} 2>&1
     rc find_packages                            >> ${{subtask_log_file}} 2>&1
     rc clean                                    >> ${{subtask_log_file}} 2>&1
     rc compile                                  >> ${{subtask_log_file}} 2>&1
-    echo "Finishing 'compile' step `date`"      >> ${{subtask_log_file}} 2>&1
+    echo "Completed 'compile' step `date`"      >> ${{subtask_log_file}} 2>&1
+    ls -ltrh {relative_log_dir:s}/*
+    rsync -az {relative_log_dir:s}/*    {absolute_base_dir:s}/{relative_log_dir:s}
+}}
 
-# can lead to large logfiles:
- # --msgLevel=DEBUG \
-# for test
- # --nEvents=10 \
-    subtask_log_file=${{relative_log_dir}}/run_{job_label:s}.log
-    echo "Starting 'run' step `date`"  >> ${{subtask_log_file}} 2>&1
-    ${{exe}} \
- --outputFile={relative_output_dir:}/{output_file:s} \
+function run() {{
+    echo "Processing {sample_name:s} `date`"
+    subtask_log_file={relative_log_dir:s}/run_{job_label:s}.log
+    VLQAnalysis \
+ --outputFile={relative_output_dir:s}/{output_file:s} \
  --inputFile={filelist_name:s} \
  --textFileList=true \
- --sampleName=hbsm \
+ --sampleName={sample_name:s} \
  --weightConfigs=${{ROOTCOREBIN}}/data/VLQAnalysis/list_weights.list \
  --doOneLeptonAna=true \
  --useLeptonsSF=true \
@@ -93,26 +76,45 @@ function main() {{
  --isData=false \
  --computeWeightSys={compute_weight_sys:s} \
  --onlyDumpSystHistograms=true \
- >> ${{subtask_log_file}} 2>&1
+ >> ${{subtask_log_file}} 2>&1 || true
+    echo "Completed 'run' step `date`"  >> ${{subtask_log_file}} 2>&1
 
-    echo "Finishing 'run' step `date`" >> ${{subtask_log_file}} 2>&1
-    if test -e {relative_output_dir:}/{output_file:s}
+# These options are only for tests
+# --msgLevel=DEBUG \  # can lead to large logfiles
+# --nEvents=10 \      # for test
+
+    if test -e {relative_output_dir:s}/{output_file:s}
         then
         echo "files being copied: [begin]"
-        ls -ltrh ${{relative_output_dir}}/*
-        ls -ltrh ${{relative_log_dir}}/*
+        ls -ltrh {relative_output_dir:s}/*
         echo "files being copied: [end]"
-        rsync -az ${{relative_output_dir}}/* ${{absolute_base_dir}}/${{relative_output_dir}}
-        rsync -az ${{relative_log_dir}}/*    ${{absolute_base_dir}}/${{relative_log_dir}}
-        touch "{absolute_base_dir:s}/{relative_status_dir:s}/{job_label:s}.${{LSB_JOBID}}.done"
+        rsync -az {relative_log_dir:s}/* {absolute_base_dir:s}/{relative_log_dir:s}
+        rsync -az {relative_output_dir:s}/* {absolute_base_dir:s}/{relative_output_dir:s}
+        touch "{absolute_base_dir:s}/{relative_status_dir:s}/{job_label:s}.done.${{LSB_JOBID}}"
+        else
+        touch "{absolute_base_dir:s}/{relative_status_dir:s}/{job_label:s}.fail.${{LSB_JOBID}}"
     fi
 
-    echo "Processed ${{sample}} `date`"
+    echo "Processed {sample_name:s} `date`"
+
+}}
 
-    # echo "Cleaning up"
+function main() {{
+
+    local relative_output_dir="{relative_output_dir:s}"
+    local absolute_base_dir="{absolute_base_dir:s}"
+    echo "Start `date`"
+    local tmp_dir=${{TMPDIR}}/LSF_${{LSB_JOBID}}
+    mkdir -vp ${{tmp_dir}}
+    cd ${{tmp_dir}}
+    echo "working from `pwd`"
+    lsetup 'rcSetup -u'
+    lsetup 'rcsetup Base,2.4.14'
+    prepare
+    run
+    # echo "Cleaning up" # done automatically on lxbatch
     # cd ..
     # rm -rf $TMPDIR
-    test -f "${{absolute_base_dir}}/${{relative_output_dir}}/{job_label:s}.${{LSB_JOBID}}.done" || abortJob
 }}
 
 main
diff --git a/data/hbsm/batch/templates/ifae/one_lep_nom.sh b/data/hbsm/batch/templates/ifae/one_lep_nom.sh
index a4c26b0..23d440a 100644
--- a/data/hbsm/batch/templates/ifae/one_lep_nom.sh
+++ b/data/hbsm/batch/templates/ifae/one_lep_nom.sh
@@ -2,69 +2,56 @@
 
 # template to submit VLQ jobs on the at3 cluster
 #
-# note to self:
+# Note to self:
 # if you want to use curly braces to highlight bash variables, you
 # need to escape them with double curly (otherwise they're picked up
 # by the python string.format
+#
+# Variable replacement strategy:
+# - replace variables are early as possible, to make commands in
+#   resulting scritps explicit (--> more easily 'joinable')
+# - when there is a value repeated several times, declare a local
+#   variable within the function
+# - assume mininal env vars (i.e. only LSB_JOBID)
+# Log file strategy:
+# - keep separate log files for compilation and processing, since we
+#   might want to join samples
+# - keep explicit logfile names within each function (easier joining)
 
 echo "setting up root "
-    # source ${{trex_dir}}/setup.sh # this doesn't work on batch
 export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase
 source ${{ATLAS_LOCAL_ROOT_BASE}}/user/atlasLocalSetup.sh
 
-function abortJob {{
-  touch "{absolute_base_dir:s}/{relative_output_dir:s}/{job_label:s}.${{LSB_JOBID}}.fail"
-  exit 1
-}}
-
-function main() {{
-
-    local tar_file="{tar_file:s}"
-    local relative_output_dir="{relative_output_dir:s}"
+function prepare() {{
     local absolute_base_dir="{absolute_base_dir:s}"
-
-    local exe="VLQAnalysis"
-
-    local subtask_log_file=""
-
-    cd ${{TMPDIR}}
-    echo "Start `date`"
-    echo "working from `pwd`"
-    echo "current contents:"
+    echo "Current directory: `pwd`"
+    echo "Current contents:"
     ls -ltrh
-    cp -p ${{tar_file}} ./tarball.tgz
+    cp -p {tar_file:s} ./tarball.tgz
     echo "untar tarball.tgz"
-    ls tarball.tgz
-
+    ls -lh tarball.tgz
     tar xzf tarball.tgz
-    # echo "contents of tarball (./all_links/):"
-    # ls -ltrh ./all_links/
 
-    mkdir -p ${{relative_log_dir}}
-    mkdir -p ${{relative_output_dir}}
-
-    echo "Processing ${{sample}} `date`"
-
-    subtask_log_file=${{relative_log_dir}}/compile_{job_label:s}.log
+    mkdir -p {relative_log_dir:s}
+    mkdir -p {relative_output_dir:s}
+    local subtask_log_file={relative_log_dir:s}/compile_{job_label:s}.log
     echo "Starting 'compilation' step `date`"   >> ${{subtask_log_file}} 2>&1
-    # rcSetup Base,2.3.50                         >> ${{subtask_log_file}} 2>&1
-    lsetup 'rcSetup -u'                         >> ${{subtask_log_file}} 2>&1
-    lsetup 'rcsetup Base,2.3.50'                >> ${{subtask_log_file}} 2>&1
     rc find_packages                            >> ${{subtask_log_file}} 2>&1
     rc clean                                    >> ${{subtask_log_file}} 2>&1
     rc compile                                  >> ${{subtask_log_file}} 2>&1
-    echo "Finishing 'compile' step `date`"      >> ${{subtask_log_file}} 2>&1
-# can lead to large logfiles:
- # --msgLevel=DEBUG \
-# for test
- # --nEvents=10 \
-    subtask_log_file=${{relative_log_dir}}/run_{job_label:s}.log
-    echo "Starting 'run' step `date`"  >> ${{subtask_log_file}} 2>&1
-    ${{exe}} \
- --outputFile={relative_output_dir:}/{output_file:s} \
+    echo "Completed 'compile' step `date`"      >> ${{subtask_log_file}} 2>&1
+    ls -ltrh {relative_log_dir:s}/*
+    rsync -az {relative_log_dir:s}/*    {absolute_base_dir:s}/{relative_log_dir:s}
+}}
+
+function run() {{
+    echo "Processing {sample_name:s} `date`"
+    local subtask_log_file={relative_log_dir:s}/run_{job_label:s}.log
+    VLQAnalysis \
+ --outputFile={relative_output_dir:s}/{output_file:s} \
  --inputFile={filelist_name:s} \
  --textFileList=true \
- --sampleName=hbsm \
+ --sampleName={sample_name:s} \
  --weightConfigs=${{ROOTCOREBIN}}/data/VLQAnalysis/list_weights.list \
  --doOneLeptonAna=true \
  --useLeptonsSF=true \
@@ -86,26 +73,38 @@ function main() {{
  --isData=false \
  --computeWeightSys={compute_weight_sys:s} \
  --onlyDumpSystHistograms=true \
- >> ${{subtask_log_file}} 2>&1
+ >> ${{subtask_log_file}} 2>&1 || true
+
+# These options are only for tests
+# --msgLevel=DEBUG \  # can lead to large logfiles
+# --nEvents=10 \      # for test
 
-    echo "Finishing 'run' step `date`" >> ${{subtask_log_file}} 2>&1
-    if test -e {relative_output_dir:}/{output_file:s}
+    if test -e {relative_output_dir:s}/{output_file:s}
         then
         echo "files being copied: [begin]"
-        ls -ltrh ${{relative_output_dir}}/*
-        ls -ltrh ${{relative_log_dir}}/*
+        ls -ltrh {relative_output_dir:s}/*
         echo "files being copied: [end]"
-        rsync -az ${{relative_output_dir}}/* ${{absolute_base_dir}}/${{relative_output_dir}}
-        rsync -az ${{relative_log_dir}}/*    ${{absolute_base_dir}}/${{relative_log_dir}}
-        touch "{absolute_base_dir:s}/{relative_status_dir:s}/{job_label:s}.${{LSB_JOBID}}.done"
+        rsync -az {relative_log_dir:s}/* {absolute_base_dir:s}/{relative_log_dir:s}
+        rsync -az {relative_output_dir:s}/* {absolute_base_dir:s}/{relative_output_dir:s}
+        touch "{absolute_base_dir:s}/{relative_status_dir:s}/{job_label:s}.done.${{LSB_JOBID}}"
+        else
+        touch "{absolute_base_dir:s}/{relative_status_dir:s}/{job_label:s}.fail.${{LSB_JOBID}}"
     fi
+    echo "Processed {sample_name:s} `date`"
 
-    echo "Processed ${{sample}} `date`"
+}}
 
+function main() {{
+    echo "Start `date`"
+    cd ${{TMPDIR}}
+    echo "working from `pwd`"
+    echo "Setting up release:"
+    lsetup 'rcSetup -u'
+    lsetup 'rcsetup Base,2.3.50'
+    prepare
+    run
     echo "Cleaning up"
     rm -rf $TMPDIR/*
-    test -f "${{absolute_base_dir}}/${{relative_output_dir}}/{job_label:s}.${{LSB_JOBID}}.done" || abortJob
-
 }}
 
 main
diff --git a/python/batch_utils.py b/python/batch_utils.py
index 9679511..e4dc769 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -15,6 +15,8 @@ batch/
      |--- output/
      `--- status/
 
+Each sample will produce a status file sample.[done,fail] and a root file.
+
 TODO: to avoid having too many files in one place, think about having
       subdirectories under output (for example by group + merged)
 
@@ -123,7 +125,7 @@ class JobManager(object):
     def submit_jobs(self):
         for job in self.jobs:
             cmd = self.job_submission_command(queue=self.queue, verbose=self.verbose,
-                                              base_dir=self.absolute_output_base_dir, job=job)
+                                              base_dir=self.absolute_base_dir, job=job)
             if self.verbose:
                 print 'cmd: ',cmd
             if not self.dry_run:
@@ -226,7 +228,7 @@ class LxbJobManager(JobManager):
                # not sure this is working
                # +" -o %s/tthf-trex-utils/batch/log/%s_%s.oe" % (base_dir, opts.label, short_batch_file)
                +" -J %s " % job.short_batch_file
-               +" -o %s.oe " % (self.relative_output_directory+'/'+job.short_batch_file)
+               +" -o %s.oe " % (self.relative_log_directory+'/'+job.short_batch_file)
                +" %s" % os.path.join(base_dir, job.script_path)
                )
         return cmd
@@ -248,7 +250,7 @@ class At3JobManager(JobManager):
     def job_submission_command(self, queue=None, verbose=None, base_dir=None, job=None):
         cmd = (" qsub "
                +" -j oe " # join stdout and stderr
-               +" -o %s.oe " % (self.relative_output_directory+'/'+job.short_batch_file)
+               +" -o %s.oe " % (self.relative_log_directory+'/'+job.short_batch_file)
                +" -q %s " % queue
                +" %s" % os.path.join(base_dir, job.script_path)
                )
-- 
GitLab


From 5597ebeaeeca8f9939cc49519f6a82b76af522a8 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Wed, 19 Oct 2016 11:27:34 +0200
Subject: [PATCH 18/35] add 'other_options' to templates

This is where the sample-specific options will go
---
 data/hbsm/batch/templates/cern/one_lep_nom.sh | 1 +
 data/hbsm/batch/templates/ifae/one_lep_nom.sh | 1 +
 python/batch_utils.py                         | 1 +
 3 files changed, 3 insertions(+)

diff --git a/data/hbsm/batch/templates/cern/one_lep_nom.sh b/data/hbsm/batch/templates/cern/one_lep_nom.sh
index 8ac1600..b0e30bc 100644
--- a/data/hbsm/batch/templates/cern/one_lep_nom.sh
+++ b/data/hbsm/batch/templates/cern/one_lep_nom.sh
@@ -76,6 +76,7 @@ function run() {{
  --isData=false \
  --computeWeightSys={compute_weight_sys:s} \
  --onlyDumpSystHistograms=true \
+ {other_options:s} \
  >> ${{subtask_log_file}} 2>&1 || true
     echo "Completed 'run' step `date`"  >> ${{subtask_log_file}} 2>&1
 
diff --git a/data/hbsm/batch/templates/ifae/one_lep_nom.sh b/data/hbsm/batch/templates/ifae/one_lep_nom.sh
index 23d440a..d234afb 100644
--- a/data/hbsm/batch/templates/ifae/one_lep_nom.sh
+++ b/data/hbsm/batch/templates/ifae/one_lep_nom.sh
@@ -73,6 +73,7 @@ function run() {{
  --isData=false \
  --computeWeightSys={compute_weight_sys:s} \
  --onlyDumpSystHistograms=true \
+ {other_options:s} \
  >> ${{subtask_log_file}} 2>&1 || true
 
 # These options are only for tests
diff --git a/python/batch_utils.py b/python/batch_utils.py
index e4dc769..af29c93 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -110,6 +110,7 @@ class JobManager(object):
                       'dsid' : sample.dsid,
                       'compute_weight_sys' : ('true' if is_nominal else 'false'),
                       'job_label' : job_label,
+                      'other_options' : '',
                       }
         batch_filename = self.relative_input_directory+'/'+sample_name+'_'+variation.name+'.sh'
         if self.debug:
-- 
GitLab


From 8b2a1662e6bf94b23cfb5fa4b222ccb07106a4df Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Wed, 19 Oct 2016 16:59:57 +0200
Subject: [PATCH 19/35] working version of hbsm_submit using all the tools

Details:
- JobManager: propagate other options to base class
- hbsm_submit.py : rewrite using JobManager
- add SampleCatalogue.prune_samples
- propagate syst_option to SampleCatalogue.add_systematic_variations
- systemtics.Variation: fix error message
---
 python/batch_utils.py       |  23 +++-
 python/hbsm_submit.py       | 252 ++++++++++--------------------------
 python/sample_catalogues.py |  80 ++++++++----
 python/systematics.py       |   2 +-
 4 files changed, 137 insertions(+), 220 deletions(-)

diff --git a/python/batch_utils.py b/python/batch_utils.py
index af29c93..22e836e 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -218,8 +218,8 @@ class JobManager(object):
 
 class LxbJobManager(JobManager):
     "Job manager for lxbatch queues at cern"
-    def __init__(self, jobset_label):
-        super(LxbJobManager, self).__init__(jobset_label)
+    def __init__(self, jobset_label, **kwargs):
+        super(LxbJobManager, self).__init__(jobset_label, **kwargs)
         self.queues = ['8nm', '1nh', '8nh', '1nd', '2nd', '1nw', '2nw', 'test'] # bqueues -u ${USER}
         self.template_path = 'VLQAnalysis/data/hbsm/batch/templates/cern/one_lep_nom.sh'
     def job_submission_command(self, queue=None, verbose=None, base_dir=None, job=None):
@@ -242,8 +242,8 @@ class LxbJobManager(JobManager):
 
 class At3JobManager(JobManager):
     "Job manager for at3 queues at pic"
-    def __init__(self, jobset):
-        super(LxbJobManager, self).__init__(jobset_label)
+    def __init__(self, jobset, **kwargs):
+        super(LxbJobManager, self).__init__(jobset_label, **kwargs)
         self.queues = ['at3_short', 'at3', 'at3_8h', 'at3_xxl']
         self.template_path = 'VLQAnalysis/data/hbsm/batch/templates/ifae/one_lep_nom.sh'
     def create_job(self, sample, systematic, variation, template_path=None):
@@ -257,7 +257,15 @@ class At3JobManager(JobManager):
                )
         return cmd
 
-
+def guess_batch_platform():
+    out = utils.get_command_output('hostname --domain')
+    domain = out['stdout'].strip()
+    if domain=='cern.ch':
+        return 'lxbatch'
+    elif domain=='pic.es':
+        return 'at3'
+    else:
+        raise NotImplementedError("unknown domain '%s'; only pic.es and cern.ch are currently supported" % domain)
 #___________________________________________________________
 
 if __name__=='__main__':
@@ -273,9 +281,10 @@ if __name__=='__main__':
     sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_eos)
     # sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_dir)
 
-    job_manager = LxbJobManager('test_2016-08-02')
+    job_manager = LxbJobManager('test_2016-10-19')
+    job_manager.queue = '8nh'
     job_manager.verbose = True
-    job_manager.dry_run = False
+    job_manager.dry_run = True # False
     for sample in sc_hbsm.samples:
         for systematic in sample.systematic_uncertainties:
             for variation in [v for v in systematic.variations if v.name=='nominal']:
diff --git a/python/hbsm_submit.py b/python/hbsm_submit.py
index f8b4e56..3a4ea4e 100755
--- a/python/hbsm_submit.py
+++ b/python/hbsm_submit.py
@@ -5,14 +5,13 @@
 # davide.gerbaudo@gmail.com
 # Jun 2016
 
+import glob
 import optparse
 import os
 
-
-from samples import hbsm as hbsm_samples
-from samples import systematics as hbsm_systematics
-from samples.sample import base_dir_lxplus as base_dir_lxplus
-from samples.sample import base_dir_at3pnfs as base_dir_at3pnfs
+import batch_utils
+import sample_catalogues
+import systematics
 import utils
 
 description = """
@@ -20,9 +19,13 @@ Submit to batch the jobs to run VLQAnalysis/util/VLQAnalysis.cxx
 """
 
 usage = """
-%prog -l bkgonly_2016-05-25
-%prog -l test_2016-07-12 --lxbatch -v -syst nominal --sample-include bbH_m1000
+First time from a new area:
+%prog -l bkgonly_2016-05-25 --generate-groupfiles
+
+Then:
 
+%prog -l test_2016-10-19
+%prog -l test_2016-10-19 --groupfile VLQAnalysis/data/groups/hbsm/hbsm.txt  --syst nominal --queue 8nh --verbose
 (run with -h to get all options)
 
 This script should be called from the directory above VLQAnalysis and RootCoreBin.
@@ -42,32 +45,33 @@ The rules to go from the templates to the job-specific files are:
   Example: 'E_{T}{something}' -> 'E_{{T}}{{something}}'
   This can be done either in the template or in this script.
 
+TODO check job output
+TODO split large jobs
+TODO merge large jobs
+TODO check root outpout
+TODO resubmit broken jobs
+TODO merge split outputs
+TODO just-print-exe
 """
 
 def main():
 
-    default_at3_queue='at3'
-    default_lx_queue='8nh'
-    default_at3_batch_template='VLQAnalysis/data/hbsm/batch/templates/ifae/one_lep_nom.sh'
-    default_lx_batch_template='VLQAnalysis/data/hbsm/batch/templates/cern/one_lep_nom.sh'
-    lxbatch_queues = ['8nm', '1nh', '8nh', '1nd', '2nd', '1nw', '2nw', 'test'] # bqueues -u ${USER}
-    at3_queues = ['at3_short', 'at3', 'at3_8h', 'at3_xxl']
-
     parser = optparse.OptionParser(description=description, usage=usage)
     parser.add_option('-l', '--label', default=None, help='job label; used to make input/output subdirectories')
-    parser.add_option('-q', '--queue', help='queue, defaults %s on at3, %s on lxbatch'%(default_at3_queue, default_lx_queue))
+    parser.add_option('-q', '--queue', default=None)
     parser.add_option('-s', '--syst', default='all', help="variations to process (default %default). Give a comma-sep list or say 'weight', 'object'")
-    parser.add_option('--syst-include', default='.*', help='include only the systematics matching the regexp')
-    parser.add_option('--syst-exclude', default=None, help='exclude the systematics matching the regexp')
+    # parser.add_option('--syst-include', default='.*', help='include only the systematics matching the regexp')
+    # parser.add_option('--syst-exclude', default=None, help='exclude the systematics matching the regexp')
     parser.add_option('--list-systematics', action='store_true', help='list the systematics available in the catalogue')
-    parser.add_option('--sample-include', default='.*', help='include only the samples matching the regexp (short name)')
-    parser.add_option('--sample-exclude', default=None, help='exclude the samples matching the regexp (short name)')
+    parser.add_option('--sample-include', default='.*', help='include only the samples matching the regexp (short name if available, else full_name)')
+    parser.add_option('--sample-exclude', default=None, help='include only the samples matching the regexp (short name if available, else full_name)')
     parser.add_option('-S', '--submit', action='store_true', help='actually submit the jobs')
-    parser.add_option('--lxbatch', action='store_true', help='lxbatch rather than at3')
-    parser.add_option('--batch-template', help='batch template, default %s or %s'%(default_at3_batch_template, default_lx_batch_template))
+    parser.add_option('--batch-template', help='batch template; otherwise use default one from JobManager')
     parser.add_option('--tarfile', default=None, help='the tar file will contain the code')
-    parser.add_option('--overwrite-tar', action=store_true, help='re-create tar even when it exists')
-    parser.add_option('--output-dir', default='batch/output/', help='output base directory (will contain job subdirectories), default %default')
+    parser.add_option('--overwrite-tar', action='store_true', help='re-create tar even when it exists')
+    parser.add_option('--generate-groupfiles', action='store_true', help='generate group files')
+    parser.add_option('--generate-filelists', action='store_true', help='generate input file lists')
+    parser.add_option('--groupfile', default=None, help='if you just want to run on one group file, eg. data/groups/hbsm/hbsm.txt')
     parser.add_option('-v', '--verbose', action='store_true', help='print what it is doing')
     parser.add_option('-d', '--debug', action='store_true', help='print even more debugging information')
 
@@ -77,170 +81,48 @@ def main():
     if opts.label and opts.label[0].isdigit():
         parser.error('Label cannot start with a digit')
     if opts.list_systematics:
-        hbsm_systematics.SystematicCatalogue().print_all()
+        systematics.SystematicCatalogue().print_all()
         return
-
-    opts.queue = (opts.queue if opts.queue else
-                  default_lx_queue if opts.lxbatch else
-                  default_at3_queue)
-    template_batch = (opts.batch_template if opts.batch_template else
-                      default_at3_batch_template if opts.queue in at3_queues else
-                      default_lx_batch_template if opts.queue in lxbatch_queues else
-                      None)
-    if not template_batch:
-        parser.error('Invalid batch configuration, check queue or template')
-
+    batch_platform = batch_utils.guess_batch_platform()
+    job_manager = (batch_utils.At3JobManager if batch_platform=='at3' else
+                   batch_utils.LxbJobManager)
+    job_manager = job_manager(jobset_label=opts.label, verbose=opts.verbose, debug=opts.debug)
+    job_manager.dry_run = not opts.submit
+    if opts.queue: job_manager.queue = opts.queue
+    if opts.batch_template: job_manager.template_path = opts.batch_template
     if opts.verbose:
         utils.print_running_conditions(parser, opts)
-        print "Preparing jobs using the following template:"
-        print "batch: %s" % template_batch
-
-    lxbatch = opts.queue in lxbatch_queues
-    at3 = opts.queue in at3_queues
-    samples_to_process = hbsm_samples.build_samples_list(base_dir_lxplus if lxbatch else base_dir_at3pnfs)
-    samples_to_process = (utils.filter_with_regexp(samples_to_process, opts.sample_include, func=lambda x: x.short_name)
-                          if opts.sample_include else samples_to_process)
-    samples_to_process = (utils.exclude_with_regexp(samples_to_process, opts.sample_exclude, func=lambda x: x.short_name)
-                          if opts.sample_exclude else samples_to_process)
-    batch_files = prepare_batch_files(opts=opts, template_filename=template_batch, samples=samples_to_process)
-    rel_out_dir = utils.mkdir_if_needed(relative_output_directory(opts))
-    for batch_file in batch_files:
-        submit_job(batch_file, opts, lxbatch=lxbatch, at3=at3)
-    if not opts.submit:
-        msg = ("This was a dry run, no jobs submitted" if not opts.verbose and not opts.debug else
-               "This was a dry run. Check your files in \n%s\n and then re-run with --submit"
-               % (relative_batch_directory(opts)))
-        print msg
-
-
-def relative_batch_directory(opts=None):
-    "where the job script files will be generated"
-    return 'batch/input/'+opts.label
-
-def relative_output_directory(opts=None):
-    "where the job script files will be generated"
-    return 'batch/output/'+opts.label
-
-def base_directory():
-    "The base directory is the one above TtHFitter and tthf-trex-utils"
-    python_dir = os.path.dirname(os.path.abspath(__file__))
-    up_two = (os.pardir, os.pardir)
-    return os.path.normpath(os.path.abspath(os.path.join(python_dir, *up_two)))
-
-def vlq_directory():
-    return base_directory()+'/VLQAnalysis'
-
-def vlq_job_label(sample_name=None, variation_name=None):
-    "The label used to distinguish one job from another."
-    job_label = sample_name+'_'+variation_name
-    return job_label
-
-def prepare_batch_files(opts=None, template_filename=None, samples=[]):
-    batch_dir = utils.mkdir_if_needed(relative_batch_directory(opts))
-    verbose = opts.verbose
-    absolute_output_base_dir = base_directory()
-    tar_file = opts.tarfile if opts.tarfile else "%s/%s/packages.tgz" % (base_directory(), batch_dir)
-    tar_file = os.path.abspath(tar_file)
-    if not os.path.exists(tar_file) or opts.overwrite_tar:
-        prepare_tar_file(tar_file_name=tar_file, verbose=opts.verbose)
-    elif verbose:
-        print "Using existing tar file: %s"%tar_file
-
-    batch_filenames = []
-    if opts.debug:
-        print "filling template %s" % (template_filename)
-    template_contents = open(template_filename).read()
-    for sample in samples:
-        sample_name = sample.short_name
-        # sample_name = 'hbsm'
-        print('fixme hack around sample_name')
-        if opts.syst=='all':
-            sample.use_all_uncertainties()
-        elif opts.syst=='object':
-            sample.use_object_uncertainties()
-        elif opts.syst=='weight':
-            sample.use_weight_uncertainties()
-        else:
-            sample.use_nominal_uncertainty()
-        systematics = sample.systematic_uncertainties
-        systematics = (utils.filter_with_regexp(systematics, opts.syst_include, func=lambda x: x.name) if opts.syst_include else
-                       systematics)
-        systematics = (utils.exclude_with_regexp(systematics, opts.syst_exclude, func=lambda x: x.name) if opts.syst_exclude else
-                       systematics)
-        # should we also filter with the same regex on variation.input_tree? it might be a useful feature
-        for systematic in systematics:
-            is_nominal = systematic.is_nominal
-            for variation in systematic.variations:
-                job_label = vlq_job_label(sample_name, variation.name)
-                parameters = {'sample_name' : sample_name,
-                              'tar_file' : tar_file,
-                              'relative_output_dir' : relative_output_directory(opts),
-                              'absolute_output_base_dir' : absolute_output_base_dir,
-                              'filelist_name' : sample.filelist_file,
-                              'input_tree' : variation.input_tree,
-                              'output_file' : (sample_name+'.root' if is_nominal else
-                                               "%s_%s.root" % (sample_name, variation.name)),
-                              'dsid' : sample.dsid,
-                              'compute_weight_sys' : ('true' if is_nominal else 'false'),
-                              'job_label' : job_label,
-                              }
-                batch_filename = batch_dir+'/'+sample_name+'_'+variation.name+'.sh'
-                if opts.debug:
-                    print "generating %s" % (batch_filename)
-                batch_file = open(batch_filename, 'w')
-                batch_file.write(template_contents.format(**parameters))
-                batch_file.close()
-                os.chmod(batch_filename, 0755)
-                if verbose:
-                    print "created batch file %s" % batch_filename
-                batch_filenames.append(batch_filename)
-    return batch_filenames
-
-def submit_job(batch_file=None, opts=None, lxbatch=False, at3=False):
-    queue = opts.queue
-    verbose = opts.verbose
-    base_dir = base_directory()
-    short_batch_file = os.path.splitext(os.path.basename(batch_file))[0] # w/out ext, =sample name
-
-    cmd = ''
-    if opts.lxbatch:
-        cmd = (" bsub "
-               +" -L /bin/bash " # reset shell
-               +" -q %s " % queue
-               # not sure this is working
-               # +" -o %s/tthf-trex-utils/batch/log/%s_%s.oe" % (base_dir, opts.label, short_batch_file)
-               +" -J %s " % short_batch_file
-               +" -o %s.oe " % (relative_output_directory(opts=opts)+'/'+short_batch_file)
-               +" %s" % os.path.join(base_dir, batch_file)
-               )
-    else:
-        cmd = (" qsub "
-               +" -j oe " # join stdout and stderr
-               +" -o %s/%s/%s.oe" % (base_dir, relative_output_directory(opts), short_batch_file)
-               +" -q %s " % queue
-               +" %s" % batch_file
-               )
-    if verbose:
-        print cmd
-    if opts.submit:
-        out = utils.get_command_output(cmd)
-        if verbose:
-            print out['stdout']
-            print out['stderr']
-
-def prepare_tar_file(tar_file_name=None, verbose=None):
-    "create tar; behaves as GNU 'tar', i.e. by default it overwrites existing files"
-    cmd  = "tar czf  %s " % tar_file_name
-    cmd += " BtaggingTRFandRW IFAEReweightingTools IFAETopFramework VLQAnalysis"
-    cmd += " --exclude='.svn' --exclude='.git' --exclude='*.o' --exclude='*.so'"
-    out = get_command_output()
-    if out['returncode']!=0:
-        print "Failed to create tar file %s" % tar_file_name
-        print out['stderr']
-        print out['stdout']
-    elif verbose:
-        print out['stderr']
-        print out['stdout']
-        print "created tar file %s" % tar_file_name
+        print "Preparing jobs using the following template: %s" % job_manager.template_path
+
+    sample_catalogue = sample_catalogues.HbsmSampleCatalogue() # TODO or VlqSampleCatalogue
+
+    if opts.generate_groupfiles:
+        # TODO prompt: ask about sample list from new production
+        sample_catalogue.add_samples_from_file(path='VLQAnalysis/data/samples_HtX4TopsNtuple-00-00-12.txt')
+        sample_catalogue.categorise_samples(sample_catalogue.samples)
+        sample_catalogue.write_group_files()
+        return
+    sample_catalogue.add_samples_from_group_files(glob.glob(opts.groupfile) if opts.groupfile else
+                                                  glob.glob(sample_catalogue.groupfiles_directory+'/*.txt'))
+    sample_catalogue.prune_samples(regex_include=opts.sample_include,
+                                   regex_exclude=opts.sample_exclude)
+    samples_to_process = sample_catalogue.add_systematic_variations(samples=sample_catalogue.samples,
+                                                                    verbose=opts.verbose,
+                                                                    syst_option=opts.syst)
+
+    if opts.generate_filelists:
+        pass
+    input_interface = sample_catalogues.RucioEosCernInterface()
+    # TODO ask where we should read the files from
+    # input_interface = sample_catalogues.RucioPnfsIfaeInterface()
+    # input_interface = sample_catalogues.At3ScratchDiskInterface()
+    sample_catalogue.add_filelists(samples=samples_to_process, input_interface=input_interface)
+
+    for sample in samples_to_process:
+        for systematic in sample.systematic_uncertainties:
+            for variation in [v for v in systematic.variations if v.name=='nominal']:
+                job_manager.create_job(sample, systematic, variation)
+    job_manager.submit_jobs()
+
 if __name__=='__main__':
     main()
diff --git a/python/sample_catalogues.py b/python/sample_catalogues.py
index 318dc40..7686730 100644
--- a/python/sample_catalogues.py
+++ b/python/sample_catalogues.py
@@ -219,7 +219,11 @@ class SampleCatalogue(object):
         for sample in samples:
             for uncertainty in sample.systematic_uncertainties:
                 for variation in uncertainty.variations:
-                    variation.filelist = input_interface.generate_filelist(sample.full_name)
+                    filelist = input_interface.generate_filelist(sample.full_name)
+                    variation.filelist = filelist
+                    if self.verbose:
+                        print "%s %s %s added filelist (%d files) %s"
+
                     # note to self: here sample knows about the
                     # container name, and variation knows about the
                     # treename. It assumes that the object variation
@@ -339,6 +343,16 @@ class SampleCatalogue(object):
             of.write('done \n')
             of.write('cd - \n')
         print "To generate the file lists, open a new shell with rucio, then execute 'source %s'" % script_filename
+    def prune_samples(self, regex_include=None, regex_exclude=None):
+        """filter samples with two input regex that are applied to the
+        short name (if available) or to the full_name"""
+        self.samples = (utils.filter_with_regexp(self.samples, regex_include,
+                                                 func=lambda x: x.short_name if x.short_name else x.full_name)
+                        if regex_include else self.samples)
+        self.samples = (utils.exclude_with_regexp(self.samples, regex_exclude,
+                                                  func=lambda x: x.short_name if x.short_name else x.full_name)
+                        if regex_exclude else self.samples)
+
 
 #___________________________________________________________
 
@@ -475,10 +489,14 @@ class HbsmSampleCatalogue(SampleCatalogue):
                 None)
 
     @classmethod
-    def add_systematic_variations(cls, samples=None, verbose=False):
+    def add_systematic_variations(cls, samples=None, verbose=False, syst_option=False):
         """Here we might need to add/drop samples, so we will just
         re-build the list dealing with the groups one at the time
         """
+        weight_only = syst_option and syst_option=='weight'
+        object_only = syst_option and syst_option=='object'
+        if syst_option and syst_option not in ['weight', 'object', 'all']:
+            raise NotImplementedError("for now can only accept either weight|object|all, not '%s'" % syst_option)
         updated_samples = []
         samples_per_group = collections.defaultdict(list)
         for sample in samples:
@@ -493,16 +511,17 @@ class HbsmSampleCatalogue(SampleCatalogue):
             else:
                 if verbose:
                     print 'adding other systematics'
-                updated_samples += cls.add_generic_systematics(samples)
+                updated_samples += cls.add_generic_systematics(samples, weight_only=weight_only, object_only=object_only)
             # do we need to do anything special for the signals?
         return updated_samples
 
     @staticmethod
-    def add_ttbar_systematics(ttbar_samples):
+    def add_ttbar_systematics(ttbar_samples, weight_only=False, object_only=False):
         """Take a list of samples and provide a new list containing
         samples with syst uncertainties (and additional samples if
         needed when splitting in hf).
         """
+        print "add_ttbar_systematics: weight_only, object_only not implemented yet"
         updated_samples = []
         hf_splitted = True # should it be configurable?  in this case we need to process n times the samples
         use_ht_slices = True # should it be configurable? in this case we need to process more samples
@@ -527,10 +546,17 @@ class HbsmSampleCatalogue(SampleCatalogue):
         return updated_samples
 
     @staticmethod
-    def add_generic_systematics(samples):
+    def add_generic_systematics(samples, weight_only=False, object_only=False):
         "Toggle on the weight and object systematic variations"
-        for s in samples:
-            s.use_all_uncertainties()
+        if weight_only:
+            for s in samples:
+                s.use_weight_uncertainties()
+        elif object_only:
+            for s in samples:
+                s.use_object_uncertainties()
+        else:
+            for s in samples:
+                s.use_all_uncertainties()
         return samples
 
 #___________________________________________________________
@@ -746,7 +772,7 @@ if __name__=='__main__':
 
     # -- tested: ok (go from one list to group files and back)
     sc = HbsmSampleCatalogue()
-    sc = VlqSampleCatalogue()
+    # sc = VlqSampleCatalogue()
     sc.verbose = True
     sc.add_samples_from_file(path='VLQAnalysis/data/samples_HtX4TopsNtuple-00-00-12.txt')
     sc.categorise_samples(sc.samples) # only for specialised catalogues
@@ -766,22 +792,22 @@ if __name__=='__main__':
     # for s in ttbar_samples:
     #     print s.short_name,' ',s.full_name
 
-# later    # -- tested: ok for both eos and disk
-# later    sc_hbsm = HbsmSampleCatalogue()
-# later    # sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/groups/hbsm/*.txt'))
-# later    sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/hbsm_test/hbsm.txt')) # test just on
-# later    sc_hbsm.samples = sc_hbsm.add_systematic_variations(sc_hbsm.samples)
-# later    input_from_dir = LocalDiskInterface(filelist_dir='VLQAnalysis/data/filelist',
-# later                                        base_input_dir='/tmp/gerbaudo/rucio')
-# later    input_from_eos = RucioEosCernInterface()
-# later    def print_filelists(samples):
-# later        for sample in samples:
-# later            for systematic in sample.systematic_uncertainties:
-# later                for variation in [v for v in systematic.variations if v.name=='nominal']:
-# later                    print "%s %s : %s" % (variation.name, sample.full_name, variation.filelist)
-# later    try:
-# later        print_filelists(sc_hbsm.samples)
-# later    except IOError:
-# later        print "Missing filelists, generating them"
-# later        sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_eos)
-# later    print_filelists(sc_hbsm.samples)
+    # -- tested: ok for both eos and disk
+    sc_hbsm = HbsmSampleCatalogue()
+    # sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/groups/hbsm/*.txt'))
+    sc_hbsm.add_samples_from_group_files(glob.glob(sc_hbsm.groupfiles_directory+'/hbsm.txt'))
+    sc_hbsm.samples = sc_hbsm.add_systematic_variations(sc_hbsm.samples)
+    input_from_dir = LocalDiskInterface(filelist_dir='VLQAnalysis/data/filelist',
+                                        base_input_dir='/tmp/gerbaudo/rucio')
+    input_from_eos = RucioEosCernInterface()
+    def print_filelists(samples):
+        for sample in samples:
+            for systematic in sample.systematic_uncertainties:
+                for variation in [v for v in systematic.variations if v.name=='nominal']:
+                    print "%s %s : %s" % (variation.name, sample.full_name, variation.filelist)
+    try:
+        print_filelists(sc_hbsm.samples)
+    except IOError:
+        print "Missing filelists, generating them"
+        sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_eos)
+    print_filelists(sc_hbsm.samples)
diff --git a/python/systematics.py b/python/systematics.py
index 0e806ca..3e91689 100644
--- a/python/systematics.py
+++ b/python/systematics.py
@@ -25,7 +25,7 @@ class Variation(object):
     @property
     def filelist(self):
         if not self._filelist:
-            raise IOError("missing input data for '%s'\nPlease call SampleCatalogue.add_input_data")
+            raise IOError("missing input data for '%s'\nPlease call SampleCatalogue.add_filelists" % self.name)
         else:
             return self._filelist
     @filelist.setter
-- 
GitLab


From e5f6547a4f25938575416e019c10d910e06bebd1 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Wed, 19 Oct 2016 17:33:34 +0200
Subject: [PATCH 20/35] try getting the release from the current environment

and failing. Still better to have the release version hardcoded only
in one place, rather than in N templates.
---
 data/hbsm/batch/templates/cern/one_lep_nom.sh |  2 +-
 data/hbsm/batch/templates/ifae/one_lep_nom.sh |  2 +-
 python/batch_utils.py                         | 13 +++++++++++++
 python/utils.py                               |  5 +++--
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/data/hbsm/batch/templates/cern/one_lep_nom.sh b/data/hbsm/batch/templates/cern/one_lep_nom.sh
index b0e30bc..9cd8f84 100644
--- a/data/hbsm/batch/templates/cern/one_lep_nom.sh
+++ b/data/hbsm/batch/templates/cern/one_lep_nom.sh
@@ -110,7 +110,7 @@ function main() {{
     cd ${{tmp_dir}}
     echo "working from `pwd`"
     lsetup 'rcSetup -u'
-    lsetup 'rcsetup Base,2.4.14'
+    lsetup 'rcsetup {rc_release_version:s}'
     prepare
     run
     # echo "Cleaning up" # done automatically on lxbatch
diff --git a/data/hbsm/batch/templates/ifae/one_lep_nom.sh b/data/hbsm/batch/templates/ifae/one_lep_nom.sh
index d234afb..c3bdc71 100644
--- a/data/hbsm/batch/templates/ifae/one_lep_nom.sh
+++ b/data/hbsm/batch/templates/ifae/one_lep_nom.sh
@@ -101,7 +101,7 @@ function main() {{
     echo "working from `pwd`"
     echo "Setting up release:"
     lsetup 'rcSetup -u'
-    lsetup 'rcsetup Base,2.3.50'
+    lsetup 'rcsetup {rc_release_version:s}'
     prepare
     run
     echo "Cleaning up"
diff --git a/python/batch_utils.py b/python/batch_utils.py
index 22e836e..983d71c 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -78,6 +78,7 @@ class JobManager(object):
         self.verbose = verbose
         self.debug = debug
         self.overwrite_tar = False
+        self.rc_release_version = guess_rc_release_version()
         self.create_directories()
 
     def create_job(self, sample, systematic, variation, template_path=None):
@@ -111,6 +112,7 @@ class JobManager(object):
                       'compute_weight_sys' : ('true' if is_nominal else 'false'),
                       'job_label' : job_label,
                       'other_options' : '',
+                      'rc_release_version' : self.rc_release_version
                       }
         batch_filename = self.relative_input_directory+'/'+sample_name+'_'+variation.name+'.sh'
         if self.debug:
@@ -266,6 +268,17 @@ def guess_batch_platform():
         return 'at3'
     else:
         raise NotImplementedError("unknown domain '%s'; only pic.es and cern.ch are currently supported" % domain)
+
+def guess_rc_release_version():
+    # out = utils.get_command_output("lsetup 'rcSetup --printMyRelease'")
+    # out = utils.get_command_output("rcSetup --printMyRelease", with_current_environment=True)
+    # print 'release: out >>>>>>>>>>> ',out['stdout'].strip()
+    # print 'release: err >>>>>>>>>>> ',out['stderr'].strip()
+    # rc_release_version = out['stdout'].strip()
+    # TODO the solution above does not work; for now hardcode it here (at least it is in one single place)
+    rc_release_version = 'Base 2.4.14'
+    return rc_release_version
+
 #___________________________________________________________
 
 if __name__=='__main__':
diff --git a/python/utils.py b/python/utils.py
index 62fdfe9..20b6d2f 100644
--- a/python/utils.py
+++ b/python/utils.py
@@ -16,9 +16,10 @@ import sys
 import subprocess
 import unittest
 
-def get_command_output(command):
+def get_command_output(command, with_current_environment=False):
     "lifted from supy (https://github.com/elaird/supy/blob/master/utils/io.py)"
-    p = subprocess.Popen(command, shell = True, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
+    env = None if not with_current_environment else os.environ.copy()
+    p = subprocess.Popen(command, shell = True, stdout = subprocess.PIPE, stderr = subprocess.PIPE, env=env)
     stdout,stderr = p.communicate()
     return {"stdout":stdout, "stderr":stderr, "returncode":p.returncode}
 
-- 
GitLab


From b94edffd7409d7e4aa74b9cf86b98700d375fe73 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Wed, 19 Oct 2016 18:03:05 +0200
Subject: [PATCH 21/35] overwrite-tar, overwrite-scripts options

---
 python/batch_utils.py | 24 +++++++++++++++---------
 python/hbsm_submit.py |  3 +++
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/python/batch_utils.py b/python/batch_utils.py
index 983d71c..9a78b17 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -78,6 +78,7 @@ class JobManager(object):
         self.verbose = verbose
         self.debug = debug
         self.overwrite_tar = False
+        self.overwrite_batch_scripts = False
         self.rc_release_version = guess_rc_release_version()
         self.create_directories()
 
@@ -88,7 +89,8 @@ class JobManager(object):
     def generic_create_job(self, sample, systematic, variation, template_path=None):
         """create the script and append Job to self.jobs template_path
         should be used only for special samples using non-default
-        template
+        template; for all other cases go through the
+        implementation-specific 'create_job'.
         """
         template_path = template_path if template_path else self.template_path
         template_contents = (self.template_contents if template_path==self.template_path # use cache if default
@@ -115,14 +117,18 @@ class JobManager(object):
                       'rc_release_version' : self.rc_release_version
                       }
         batch_filename = self.relative_input_directory+'/'+sample_name+'_'+variation.name+'.sh'
-        if self.debug:
-            print "generating %s" % (batch_filename)
-        batch_file = open(batch_filename, 'w')
-        batch_file.write(template_contents.format(**parameters))
-        batch_file.close()
-        os.chmod(batch_filename, 0755)
-        if self.verbose:
-            print "created batch file %s" % batch_filename
+        if os.path.exists(batch_filename) and not self.overwrite_batch_scripts:
+            if self.debug:
+                print "using existing %s" % batch_filename
+        else:
+            if self.debug:
+                print "generating %s" % (batch_filename)
+            batch_file = open(batch_filename, 'w')
+            batch_file.write(template_contents.format(**parameters))
+            batch_file.close()
+            os.chmod(batch_filename, 0755)
+            if self.verbose:
+                print "created batch file %s" % batch_filename
         self.jobs.append(Job(batch_filename))
 
     def submit_jobs(self):
diff --git a/python/hbsm_submit.py b/python/hbsm_submit.py
index 3a4ea4e..0941499 100755
--- a/python/hbsm_submit.py
+++ b/python/hbsm_submit.py
@@ -69,6 +69,7 @@ def main():
     parser.add_option('--batch-template', help='batch template; otherwise use default one from JobManager')
     parser.add_option('--tarfile', default=None, help='the tar file will contain the code')
     parser.add_option('--overwrite-tar', action='store_true', help='re-create tar even when it exists')
+    parser.add_option('--overwrite-scripts', action='store_true', help='re-create the batch scripts even when they exist')
     parser.add_option('--generate-groupfiles', action='store_true', help='generate group files')
     parser.add_option('--generate-filelists', action='store_true', help='generate input file lists')
     parser.add_option('--groupfile', default=None, help='if you just want to run on one group file, eg. data/groups/hbsm/hbsm.txt')
@@ -90,6 +91,8 @@ def main():
     job_manager.dry_run = not opts.submit
     if opts.queue: job_manager.queue = opts.queue
     if opts.batch_template: job_manager.template_path = opts.batch_template
+    if opts.overwrite_tar: job_manager.overwrite_tar = True
+    if opts.overwrite_scripts: job_manager.overwrite_batch_scripts = True
     if opts.verbose:
         utils.print_running_conditions(parser, opts)
         print "Preparing jobs using the following template: %s" % job_manager.template_path
-- 
GitLab


From 97f650e44077cd05250b6b0b3dd81b2cff2832c8 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Wed, 19 Oct 2016 18:34:27 +0200
Subject: [PATCH 22/35] add check-outputs

---
 python/batch_utils.py | 31 +++++++++++++++++++++++++++++--
 python/hbsm_submit.py |  9 ++++++++-
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/python/batch_utils.py b/python/batch_utils.py
index 9a78b17..8471fc0 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -52,14 +52,27 @@ def vlq_directory():
 #___________________________________________________________
 
 class Job(object):
-    "TODO add description"
+    """A job with a script file and an expected ouput.
+    """
     def __init__(self, script_path):
         self.script_path = script_path
+        self._expected_output_file = None
     @property
     def short_batch_file(self):
         "use to name the status and log files"
         return utils.filename_without_extension(self.script_path) # w/out ext, =sample name
-
+    @property
+    def expected_output_file(self):
+        if not self._expected_output_file:
+            cmd = "grep outputFile %s" % self.script_path
+            out = utils.get_command_output(cmd)
+            tokens = out['stdout'].strip().replace('=', ' ').split()
+            file_path = next((t for t in tokens if '.root' in t), '')
+            if file_path:
+                self._expected_output_file = file_path.strip()
+            else:
+                raise RuntimeError("cannot extract output file\nTry\n%s" % cmd)
+        return self._expected_output_file
 
 #___________________________________________________________
 
@@ -143,7 +156,21 @@ class JobManager(object):
                 with open(status_path, 'w') as status_file:
                     status_file.write('stdout:\n'+out['stdout']+
                                       'stderr:\n'+ out['stderr'])
+    def check_outputs(self):
+        counter_any = 0
+        counter_done = 0
+        counter_not_done = 0
+        for job in self.jobs:
+            if os.path.exists(job.expected_output_file):
+                job.ready = True
+                counter_done += 1
+            else:
+                counter_not_done += 1
+            counter_any += 1
+        print "Checked %d jobs: %d done, %d missing" % (counter_any, counter_done, counter_not_done)
+
     def check_job(self, sample):
+        # TODO this should probably become obsolete (there's already bjobs)
         raise NotImplementedError("todo")
     @property
     def relative_input_directory(self):
diff --git a/python/hbsm_submit.py b/python/hbsm_submit.py
index 0941499..9ebc489 100755
--- a/python/hbsm_submit.py
+++ b/python/hbsm_submit.py
@@ -26,6 +26,9 @@ Then:
 
 %prog -l test_2016-10-19
 %prog -l test_2016-10-19 --groupfile VLQAnalysis/data/groups/hbsm/hbsm.txt  --syst nominal --queue 8nh --verbose
+
+%prog -l test_2016-10-19 --groupfile VLQAnalysis/data/groups/hbsm/hbsm.txt  --syst nominal --queue 8nh --check-outputs
+
 (run with -h to get all options)
 
 This script should be called from the directory above VLQAnalysis and RootCoreBin.
@@ -62,6 +65,7 @@ def main():
     parser.add_option('-s', '--syst', default='all', help="variations to process (default %default). Give a comma-sep list or say 'weight', 'object'")
     # parser.add_option('--syst-include', default='.*', help='include only the systematics matching the regexp')
     # parser.add_option('--syst-exclude', default=None, help='exclude the systematics matching the regexp')
+    parser.add_option('--check-outputs', action='store_true', help='check the root output files') # TODO check and resubmit
     parser.add_option('--list-systematics', action='store_true', help='list the systematics available in the catalogue')
     parser.add_option('--sample-include', default='.*', help='include only the samples matching the regexp (short name if available, else full_name)')
     parser.add_option('--sample-exclude', default=None, help='include only the samples matching the regexp (short name if available, else full_name)')
@@ -125,7 +129,10 @@ def main():
         for systematic in sample.systematic_uncertainties:
             for variation in [v for v in systematic.variations if v.name=='nominal']:
                 job_manager.create_job(sample, systematic, variation)
-    job_manager.submit_jobs()
+    if opts.check_outputs:
+        job_manager.check_outputs()
+    else:
+        job_manager.submit_jobs()
 
 if __name__=='__main__':
     main()
-- 
GitLab


From 88b386aad73f346181540d1575d15609a33f881f Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Mon, 24 Oct 2016 16:26:17 +0200
Subject: [PATCH 23/35] make clear that default is 'nominal'

Also fix bug (nominal was added only when calling add_systematics)
---
 python/hbsm_submit.py       | 11 ++++++-----
 python/sample_catalogues.py |  8 ++++++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/python/hbsm_submit.py b/python/hbsm_submit.py
index 9ebc489..27e8856 100755
--- a/python/hbsm_submit.py
+++ b/python/hbsm_submit.py
@@ -62,11 +62,11 @@ def main():
     parser = optparse.OptionParser(description=description, usage=usage)
     parser.add_option('-l', '--label', default=None, help='job label; used to make input/output subdirectories')
     parser.add_option('-q', '--queue', default=None)
-    parser.add_option('-s', '--syst', default='all', help="variations to process (default %default). Give a comma-sep list or say 'weight', 'object'")
+    parser.add_option('-s', '--syst', default='nominal', help="variations to process ('weight', 'object', default %default).")
+    parser.add_option('--list-systematics', action='store_true', help='list the systematics available in the catalogue')
     # parser.add_option('--syst-include', default='.*', help='include only the systematics matching the regexp')
     # parser.add_option('--syst-exclude', default=None, help='exclude the systematics matching the regexp')
     parser.add_option('--check-outputs', action='store_true', help='check the root output files') # TODO check and resubmit
-    parser.add_option('--list-systematics', action='store_true', help='list the systematics available in the catalogue')
     parser.add_option('--sample-include', default='.*', help='include only the samples matching the regexp (short name if available, else full_name)')
     parser.add_option('--sample-exclude', default=None, help='include only the samples matching the regexp (short name if available, else full_name)')
     parser.add_option('-S', '--submit', action='store_true', help='actually submit the jobs')
@@ -113,9 +113,10 @@ def main():
                                                   glob.glob(sample_catalogue.groupfiles_directory+'/*.txt'))
     sample_catalogue.prune_samples(regex_include=opts.sample_include,
                                    regex_exclude=opts.sample_exclude)
-    samples_to_process = sample_catalogue.add_systematic_variations(samples=sample_catalogue.samples,
-                                                                    verbose=opts.verbose,
-                                                                    syst_option=opts.syst)
+    samples_to_process = (sample_catalogue.samples if opts.syst=='nominal' else
+                          sample_catalogue.add_systematic_variations(samples=sample_catalogue.samples,
+                                                                     verbose=opts.verbose,
+                                                                     syst_option=opts.syst))
 
     if opts.generate_filelists:
         pass
diff --git a/python/sample_catalogues.py b/python/sample_catalogues.py
index 7686730..b1cbeb0 100644
--- a/python/sample_catalogues.py
+++ b/python/sample_catalogues.py
@@ -87,7 +87,7 @@ class Sample(object):
         self._group = group
         self.job_options = job_options
         self.ds_input = ds_input
-        self.systematic_uncertainties = []
+        self.systematic_uncertainties = [catalogue.nominal]
         if not full_name:
             raise ValueError("Sample must have full_name")
 
@@ -511,8 +511,12 @@ class HbsmSampleCatalogue(SampleCatalogue):
             else:
                 if verbose:
                     print 'adding other systematics'
-                updated_samples += cls.add_generic_systematics(samples, weight_only=weight_only, object_only=object_only)
+                updated_samples += cls.add_generic_systematics(samples,
+                                                               weight_only=weight_only,
+                                                               object_only=object_only)
             # do we need to do anything special for the signals?
+            # TODO filter systematics with comma-sep list from syst_option
+            # TODO filter systematics with refex
         return updated_samples
 
     @staticmethod
-- 
GitLab


From 16155b14fe1a6db5e3e19d89e8bfa04cd130fda4 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Tue, 25 Oct 2016 18:26:37 +0200
Subject: [PATCH 24/35] implement JobSet and merge script functionality

It works fine in my tests. For now hardcoded with a min of 10 input files.

Also add the option to check the outputs (however this re-generates
the single-sample job scripts).

Todo next: recreate the Job and JobSet objects in memory from the script files.
---
 python/batch_utils.py | 249 +++++++++++++++++++++++++++++++++++-------
 python/hbsm_submit.py |   2 +
 2 files changed, 214 insertions(+), 37 deletions(-)

diff --git a/python/batch_utils.py b/python/batch_utils.py
index 8471fc0..0841d03 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -3,8 +3,9 @@ This module provides utilities to interact with batch systems (lxplus or at3)
 
 Overall design:
 
-JobManager creates Jobs
-Jobs can be generated/submitted/checked/resubmitted
+JobManager creates Jobs and JobSets
+Jobs    can be generated/submitted/checked/resubmitted
+JobSets can be generated/submitted/checked/resubmitted
 
 Each group of jobs has a label; all relevant files go in the directories below:
 
@@ -54,25 +55,164 @@ def vlq_directory():
 class Job(object):
     """A job with a script file and an expected ouput.
     """
+    keyword_output_file = 'outputFile='
+    keyword_run_function = 'function run() {'
+    keyword_main_function = 'function main() {'
     def __init__(self, script_path):
         self.script_path = script_path
         self._expected_output_file = None
+        self._number_input_files = None
     @property
     def short_batch_file(self):
         "use to name the status and log files"
         return utils.filename_without_extension(self.script_path) # w/out ext, =sample name
+    @classmethod
+    def parse_expected_output_file(cls, file_path):
+        cmd = "grep %s %s" % (Job.keyword_output_file, file_path)
+        out = utils.get_command_output(cmd)
+        tokens = out['stdout'].strip().replace('=', ' ').split()
+        output_file = file_path = next((t for t in tokens if '.root' in t), '')
+        output_file = output_file.strip()
+        return output_file
     @property
     def expected_output_file(self):
         if not self._expected_output_file:
-            cmd = "grep outputFile %s" % self.script_path
+            cmd = 'grep "%s" %s' % (Job.keyword_output_file, self.script_path)
             out = utils.get_command_output(cmd)
             tokens = out['stdout'].strip().replace('=', ' ').split()
-            file_path = next((t for t in tokens if '.root' in t), '')
-            if file_path:
-                self._expected_output_file = file_path.strip()
-            else:
-                raise RuntimeError("cannot extract output file\nTry\n%s" % cmd)
+            self._expected_output_file = self.parse_expected_output_file(file_path=self.script_path)
+            if not self._expected_output_file:
+                raise RuntimeError("cannot extract output file from %s" % self.script_path)
         return self._expected_output_file
+    @property
+    def expected_output_files(self):
+        "Just to provide the same interface as JobSet"
+        return [self.expected_output_file]
+    @property
+    def number_input_files(self):
+        if not self._number_input_files:
+            cmd = 'grep "inputFile=" '+self.script_path
+            out = utils.get_command_output(cmd)
+            filelist_path = next((t for t in out['stdout'].split() if 'inputFile' in t), None) # find inputFile keyword
+            filelist_path = filelist_path.split('=')[1].strip() # take what follows =
+            filelist_path = filelist_path.split()[0].strip() # and drop any subsequent words
+            self._number_input_files = sum(1 for l in open(filelist_path).readlines() if not l.strip().startswith('#'))
+        return self._number_input_files
+
+#___________________________________________________________
+
+class JobSet(object):
+    """A job with a script file and multiple expected ouputs.
+
+    A jobset can be built either merging several jobs or from a merged script.
+    """
+    def __init__(self,script_path=None, jobs=[], verbose=False):
+        self.script_path = None
+        self._expected_output_files = []
+        if not script_path:
+            raise NotImplementedError("JobSet requires a script path")
+        from_merged_script = os.path.exists(script_path) and not jobs
+        from_jobs_to_merge = jobs and len(jobs)>0
+        if from_merged_script==from_jobs_to_merge:
+            raise NotImplementedError("Invalid arguments: script_path %s, %d jobs"%(script_path,
+                                                                                    len(jobs)))
+        if from_merged_script:
+            self.script_path = script_path
+            self._expected_output_files = JobSet.parse_expected_output_files(script_path)
+        else:
+            pre_merge_script_paths = [j.script_path for j in jobs]
+            self.script_path = JobSet.merge_scripts(orig_job_script_paths=pre_merge_script_paths, dest_script_path=script_path)
+            JobSet.delete_pre_merge_scripts(file_paths=pre_merge_script_paths)
+            self._expected_output_files = JobSet.parse_expected_output_files(script_path)
+            if verbose:
+                print "merged into\n > %s\nfrom%s" % (script_path, '\n< '.join(['']+pre_merge_script_paths))
+
+    @property
+    def expected_output_files(self):
+        "No need to cache here: the constructor should extract from the script file (which is always there)"
+        if not self._expected_output_files:
+            raise RuntimeError("something went wrong when parsing %s?" % self.script_path)
+        return self._expected_output_file
+
+    @classmethod
+    def parse_expected_output_files(cls, file_path, expected_keyword='outputFile='):
+        "same as Job.parse_expected_output_file, but with multiple output files"
+        cmd = "grep %s %s" % (expected_keyword, file_path)
+        out = utils.get_command_output(cmd)
+        lines = out['stdout'].split('\n')
+        filenames = []
+        for line in lines:
+            tokens = line.strip().replace('=', ' ').split()
+            output_file = file_path = next((t for t in tokens if '.root' in t), '')
+            if output_file and output_file.strip():
+                filenames.append(output_file.strip())
+        return filenames
+
+    @classmethod
+    def merge_scripts(cls, orig_job_script_paths=[], dest_script_path=None):
+        """merge the batch job scripts.
+
+        Modify the first script and plug in the 'run' functions from
+        all the subsequent ones. The subsequent scripts are deleted
+        after the merge.
+        """
+        template_path = orig_job_script_paths[0]
+        template_contents = open(template_path).read()
+        run_fuction   = JobSet.extract_function(template_path, Job.keyword_run_function)
+        main_function = JobSet.extract_function(template_path, Job.keyword_main_function)
+
+        run_fuctions = [JobSet.extract_function(j, Job.keyword_run_function) for j in orig_job_script_paths]
+        run_fuctions = [f.replace(Job.keyword_run_function,
+                                  Job.keyword_run_function.replace('run', "run%02d"%i))
+                        for i, f in enumerate(run_fuctions)]
+        main_function_new = '\n'.join(l if not l.strip()=='run' else
+                                      '\n'.join(l.replace('run', "run%02d"%i)
+                                                for i in range(len(run_fuctions)))
+                                      for l in main_function.split('\n'))
+        script_contents = (template_contents
+                           .replace(main_function, main_function_new)
+                           .replace(run_fuction,
+                                    '\n'.join(run_fuctions)))
+        with open(dest_script_path, 'w') as output_script:
+            output_script.write(script_contents)
+        return dest_script_path
+
+    @classmethod
+    def delete_pre_merge_scripts(cls, file_paths=[]):
+        for f in file_paths:
+            os.remove(f)
+
+    @classmethod
+    def extract_function(cls, script_path, starting_token):
+        """extract the 'run' bash function from the script
+
+        Note to self: cannot use regex because they cannot parse nested structures.
+        """
+        if not ('function' in starting_token and '{' in starting_token):
+            raise NotImplementedError("invalid starting_token, must contain 'function and ''{': \n''%s'"%starting_token)
+        function_lines = []
+        with open(script_path) as input_file:
+            contents = input_file.read()
+            if starting_token in contents:
+                begin_pos = contents.find(starting_token)
+                contents = contents[begin_pos:]
+                scope_counter = 0
+                for line in contents.split('\n'):
+                    if line.strip().startswith('#'):
+                        function_lines.append(line)
+                        continue
+                    open_curly_counter = line.count('{')
+                    close_curly_counter = line.count('}')
+                    scope_counter += (open_curly_counter - close_curly_counter)
+                    function_lines.append(line)
+                    if scope_counter==0:
+                        break
+        return '\n'.join(function_lines)
+
+    @property
+    def short_batch_file(self):
+        "use to name the status and log files"
+        return utils.filename_without_extension(self.script_path) # w/out ext
 
 #___________________________________________________________
 
@@ -144,6 +284,26 @@ class JobManager(object):
                 print "created batch file %s" % batch_filename
         self.jobs.append(Job(batch_filename))
 
+    def merge_jobs(self, min_n_input_files=10):
+        "replace jobs with merged jobsets"
+        jobs = []
+        jobsets = []
+        number_of_input_files = 0
+        for job in self.jobs:
+            number_of_input_files += job.number_input_files
+            jobs.append(job)
+            if number_of_input_files > min_n_input_files:
+                jobsets.append(JobSet(script_path="%s/jobset%03d.sh"% (self.relative_input_directory, len(jobsets)),
+                                      jobs=jobs,
+                                      verbose=self.verbose))
+                jobs = []
+                number_of_input_files = 0
+        if number_of_input_files:
+            jobsets.append(JobSet("%s/jobset%03d.sh"% (self.relative_input_directory, len(jobsets)),
+                                  jobs=jobs,
+                                  verbose=self.verbose))
+        self.jobs = jobsets
+
     def submit_jobs(self):
         for job in self.jobs:
             cmd = self.job_submission_command(queue=self.queue, verbose=self.verbose,
@@ -156,18 +316,29 @@ class JobManager(object):
                 with open(status_path, 'w') as status_file:
                     status_file.write('stdout:\n'+out['stdout']+
                                       'stderr:\n'+ out['stderr'])
+        if self.dry_run:
+            print "This was a dry run. To actually submit the jobs run with '--submit'"
     def check_outputs(self):
-        counter_any = 0
-        counter_done = 0
-        counter_not_done = 0
+        counter_job_any = 0
+        counter_job_done = 0
+        counter_job_miss = 0
+        counter_files_any = 0
+        counter_files_done = 0
         for job in self.jobs:
-            if os.path.exists(job.expected_output_file):
-                job.ready = True
-                counter_done += 1
-            else:
-                counter_not_done += 1
-            counter_any += 1
-        print "Checked %d jobs: %d done, %d missing" % (counter_any, counter_done, counter_not_done)
+            out_filenames = job.expected_output_files
+            expect = len(out_filenames)
+            done = sum(1 for f in out_filenames if os.path.exists(f))
+            miss = expect - done
+            counter_files_any += expect
+            counter_files_done += done
+            counter_job_any += 1
+            counter_job_done += (1 if expect==done else 0)
+            counter_job_miss += (0 if expect==done else 1)
+        print "Checked %d jobs: %d done, %d missing (%d/%d output files)" % (counter_job_any,
+                                                                             counter_job_done,
+                                                                             counter_job_miss,
+                                                                             counter_files_done,
+                                                                             counter_files_any)
 
     def check_job(self, sample):
         # TODO this should probably become obsolete (there's already bjobs)
@@ -317,22 +488,26 @@ def guess_rc_release_version():
 if __name__=='__main__':
     print "Testing job manager"
 
-    sc_hbsm = HbsmSampleCatalogue()
-    # sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/groups/hbsm/*.txt'))
-    sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/groups/hbsm/hbsm.txt'))
-    sc_hbsm.samples = sc_hbsm.add_systematic_variations(sc_hbsm.samples)
-    input_from_dir = LocalDiskInterface(filelist_dir='VLQAnalysis/data/filelist',
-                                        base_input_dir='/tmp/gerbaudo/rucio')
-    input_from_eos = RucioEosCernInterface()
-    sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_eos)
-    # sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_dir)
-
-    job_manager = LxbJobManager('test_2016-10-19')
-    job_manager.queue = '8nh'
-    job_manager.verbose = True
-    job_manager.dry_run = True # False
-    for sample in sc_hbsm.samples:
-        for systematic in sample.systematic_uncertainties:
-            for variation in [v for v in systematic.variations if v.name=='nominal']:
-                job_manager.create_job(sample, systematic, variation)
-    job_manager.submit_jobs()
+    # sc_hbsm = HbsmSampleCatalogue()
+    # # sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/groups/hbsm/*.txt'))
+    # sc_hbsm.add_samples_from_group_files(glob.glob('VLQAnalysis/data/groups/hbsm/hbsm.txt'))
+    # sc_hbsm.samples = sc_hbsm.add_systematic_variations(sc_hbsm.samples)
+    # input_from_dir = LocalDiskInterface(filelist_dir='VLQAnalysis/data/filelist',
+    #                                     base_input_dir='/tmp/gerbaudo/rucio')
+    # input_from_eos = RucioEosCernInterface()
+    # sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_eos)
+    # # sc_hbsm.add_filelists(samples=sc_hbsm.samples, input_interface=input_from_dir)
+
+    # job_manager = LxbJobManager('test_2016-10-19')
+    # job_manager.queue = '8nh'
+    # job_manager.verbose = True
+    # job_manager.dry_run = True # False
+    # for sample in sc_hbsm.samples:
+    #     for systematic in sample.systematic_uncertainties:
+    #         for variation in [v for v in systematic.variations if v.name=='nominal']:
+    #             job_manager.create_job(sample, systematic, variation)
+    # job_manager.submit_jobs()
+
+
+    JobSet.extract_function(script_path='batch/test_2016-10-24/input/user.mcasolin.341543.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root_nominal.sh',
+                            starting_token = 'function run() {')
diff --git a/python/hbsm_submit.py b/python/hbsm_submit.py
index 27e8856..e4f8a17 100755
--- a/python/hbsm_submit.py
+++ b/python/hbsm_submit.py
@@ -126,6 +126,7 @@ def main():
     # input_interface = sample_catalogues.At3ScratchDiskInterface()
     sample_catalogue.add_filelists(samples=samples_to_process, input_interface=input_interface)
 
+    # TODO create jobs from scripts if already there
     for sample in samples_to_process:
         for systematic in sample.systematic_uncertainties:
             for variation in [v for v in systematic.variations if v.name=='nominal']:
@@ -133,6 +134,7 @@ def main():
     if opts.check_outputs:
         job_manager.check_outputs()
     else:
+        job_manager.merge_jobs()
         job_manager.submit_jobs()
 
 if __name__=='__main__':
-- 
GitLab


From 18f38e2b66d859b3b4e5b371b52c3d54f3de99b4 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Wed, 26 Oct 2016 11:00:00 +0200
Subject: [PATCH 25/35] chmod jobset script (amends 16155b1)

---
 python/batch_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/batch_utils.py b/python/batch_utils.py
index 0841d03..c866546 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -122,6 +122,7 @@ class JobSet(object):
         else:
             pre_merge_script_paths = [j.script_path for j in jobs]
             self.script_path = JobSet.merge_scripts(orig_job_script_paths=pre_merge_script_paths, dest_script_path=script_path)
+            os.chmod(self.script_path, 0755)
             JobSet.delete_pre_merge_scripts(file_paths=pre_merge_script_paths)
             self._expected_output_files = JobSet.parse_expected_output_files(script_path)
             if verbose:
-- 
GitLab


From 641c9ee3a5c7747718791c941509375156e4711a Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Wed, 26 Oct 2016 16:19:57 +0200
Subject: [PATCH 26/35] better Job/JobSet handling

Details:
- if scripts exist, parse them and generate jobs from them
- don't use catalogues if using existing scripts
- JobSet: configurable threshold to merge
- check outputs now implemented for both Job and JobSet
- cmd-line option to choose file location
---
 python/batch_utils.py       | 24 +++++++++--
 python/hbsm_submit.py       | 79 ++++++++++++++++++++++---------------
 python/sample_catalogues.py | 24 +++++------
 3 files changed, 81 insertions(+), 46 deletions(-)

diff --git a/python/batch_utils.py b/python/batch_utils.py
index c866546..db2a676 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -62,6 +62,7 @@ class Job(object):
         self.script_path = script_path
         self._expected_output_file = None
         self._number_input_files = None
+
     @property
     def short_batch_file(self):
         "use to name the status and log files"
@@ -121,7 +122,8 @@ class JobSet(object):
             self._expected_output_files = JobSet.parse_expected_output_files(script_path)
         else:
             pre_merge_script_paths = [j.script_path for j in jobs]
-            self.script_path = JobSet.merge_scripts(orig_job_script_paths=pre_merge_script_paths, dest_script_path=script_path)
+            self.script_path = JobSet.merge_scripts(orig_job_script_paths=pre_merge_script_paths,
+                                                    dest_script_path=script_path)
             os.chmod(self.script_path, 0755)
             JobSet.delete_pre_merge_scripts(file_paths=pre_merge_script_paths)
             self._expected_output_files = JobSet.parse_expected_output_files(script_path)
@@ -133,7 +135,7 @@ class JobSet(object):
         "No need to cache here: the constructor should extract from the script file (which is always there)"
         if not self._expected_output_files:
             raise RuntimeError("something went wrong when parsing %s?" % self.script_path)
-        return self._expected_output_file
+        return self._expected_output_files
 
     @classmethod
     def parse_expected_output_files(cls, file_path, expected_keyword='outputFile='):
@@ -219,7 +221,7 @@ class JobSet(object):
 
 class JobManager(object):
     "Manage a set of jobs; all inputs/outputs will be under batch/<jobset_label>"
-    def __init__(self, jobset_label, verbose=False, debug=False):
+    def __init__(self, jobset_label, verbose=False, debug=False, overwrite_batch_scripts=False):
         self.jobset = jobset_label
         self.queues = []
         self._queue = None
@@ -232,9 +234,23 @@ class JobManager(object):
         self.verbose = verbose
         self.debug = debug
         self.overwrite_tar = False
-        self.overwrite_batch_scripts = False
+        self.overwrite_batch_scripts = overwrite_batch_scripts
         self.rc_release_version = guess_rc_release_version()
         self.create_directories()
+        existing_scripts = glob.glob(self.relative_input_directory+'/*.sh')
+        if existing_scripts and not overwrite_batch_scripts:
+            using_merged_scripts = all('jobset' in f for f in existing_scripts)
+            using_sample_scripts = all('jobset' not in f for f in existing_scripts)
+            if using_merged_scripts==using_sample_scripts:
+                raise NotImplementedError("Cannot handle a mix of merged/unmerged scripts from %s" % self.relative_input_directory)
+            self.jobs = [JobSet(script_path=f, verbose=self.verbose) if using_merged_scripts else
+                         Job(script_path=f)
+                         for f in sorted(existing_scripts)]
+        elif verbose:
+            print "JobManager: you now need to loop over samples/variations and create the jobs"
+    @property
+    def needs_to_generate_scripts(self):
+        return self.overwrite_batch_scripts or not self.jobs
 
     def create_job(self, sample, systematic, variation, template_path=None):
         "This will need access to several specialised attributes (template, dirs, etc.)"
diff --git a/python/hbsm_submit.py b/python/hbsm_submit.py
index e4f8a17..4b7dc14 100755
--- a/python/hbsm_submit.py
+++ b/python/hbsm_submit.py
@@ -1,7 +1,5 @@
 #!/bin/env python
 
-# TODO add Job class with status
-
 # davide.gerbaudo@gmail.com
 # Jun 2016
 
@@ -48,12 +46,9 @@ The rules to go from the templates to the job-specific files are:
   Example: 'E_{T}{something}' -> 'E_{{T}}{{something}}'
   This can be done either in the template or in this script.
 
-TODO check job output
 TODO split large jobs
-TODO merge large jobs
-TODO check root outpout
-TODO resubmit broken jobs
 TODO merge split outputs
+TODO resubmit broken jobs
 TODO just-print-exe
 """
 
@@ -77,6 +72,9 @@ def main():
     parser.add_option('--generate-groupfiles', action='store_true', help='generate group files')
     parser.add_option('--generate-filelists', action='store_true', help='generate input file lists')
     parser.add_option('--groupfile', default=None, help='if you just want to run on one group file, eg. data/groups/hbsm/hbsm.txt')
+    parser.add_option('--input-from', default='rucioeos',
+                      help='Where the ntuples are stored; see sample_catalogues.InputDataInterface')
+    parser.add_option('--merge-fewer', default=1, type=int, help='merge jobs if less than N input files')
     parser.add_option('-v', '--verbose', action='store_true', help='print what it is doing')
     parser.add_option('-d', '--debug', action='store_true', help='print even more debugging information')
 
@@ -85,18 +83,21 @@ def main():
         parser.error('You must provide a label option')
     if opts.label and opts.label[0].isdigit():
         parser.error('Label cannot start with a digit')
+    if not is_valid_input(opts):
+        parser.error('Invalid --input-from')
     if opts.list_systematics:
         systematics.SystematicCatalogue().print_all()
         return
+
     batch_platform = batch_utils.guess_batch_platform()
     job_manager = (batch_utils.At3JobManager if batch_platform=='at3' else
                    batch_utils.LxbJobManager)
-    job_manager = job_manager(jobset_label=opts.label, verbose=opts.verbose, debug=opts.debug)
+    job_manager = job_manager(jobset_label=opts.label, verbose=opts.verbose, debug=opts.debug,
+                              overwrite_batch_scripts=opts.overwrite_scripts)
     job_manager.dry_run = not opts.submit
     if opts.queue: job_manager.queue = opts.queue
     if opts.batch_template: job_manager.template_path = opts.batch_template
     if opts.overwrite_tar: job_manager.overwrite_tar = True
-    if opts.overwrite_scripts: job_manager.overwrite_batch_scripts = True
     if opts.verbose:
         utils.print_running_conditions(parser, opts)
         print "Preparing jobs using the following template: %s" % job_manager.template_path
@@ -109,33 +110,49 @@ def main():
         sample_catalogue.categorise_samples(sample_catalogue.samples)
         sample_catalogue.write_group_files()
         return
-    sample_catalogue.add_samples_from_group_files(glob.glob(opts.groupfile) if opts.groupfile else
-                                                  glob.glob(sample_catalogue.groupfiles_directory+'/*.txt'))
-    sample_catalogue.prune_samples(regex_include=opts.sample_include,
-                                   regex_exclude=opts.sample_exclude)
-    samples_to_process = (sample_catalogue.samples if opts.syst=='nominal' else
-                          sample_catalogue.add_systematic_variations(samples=sample_catalogue.samples,
-                                                                     verbose=opts.verbose,
-                                                                     syst_option=opts.syst))
-
-    if opts.generate_filelists:
-        pass
-    input_interface = sample_catalogues.RucioEosCernInterface()
-    # TODO ask where we should read the files from
-    # input_interface = sample_catalogues.RucioPnfsIfaeInterface()
-    # input_interface = sample_catalogues.At3ScratchDiskInterface()
-    sample_catalogue.add_filelists(samples=samples_to_process, input_interface=input_interface)
-
-    # TODO create jobs from scripts if already there
-    for sample in samples_to_process:
-        for systematic in sample.systematic_uncertainties:
-            for variation in [v for v in systematic.variations if v.name=='nominal']:
-                job_manager.create_job(sample, systematic, variation)
+    if job_manager.needs_to_generate_scripts:
+        # if we need to generate scripts we need samples and filelists
+        sample_catalogue.add_samples_from_group_files(glob.glob(opts.groupfile) if opts.groupfile else
+                                                      glob.glob(sample_catalogue.groupfiles_directory+'/*.txt'))
+        sample_catalogue.prune_samples(regex_include=opts.sample_include,
+                                       regex_exclude=opts.sample_exclude)
+        samples_to_process = (sample_catalogue.samples if opts.syst=='nominal' else
+                              sample_catalogue.add_systematic_variations(samples=sample_catalogue.samples,
+                                                                         verbose=opts.verbose,
+                                                                         syst_option=opts.syst))
+
+        if opts.generate_filelists:
+            pass
+        input_interface = (sample_catalogues.RucioEosCernInterface() if opts.input_from=='rucioeos' else
+                           sample_catalogues.EosUserInterface() if opts.input_from=='eosuser' else
+                           sample_catalogues.RucioPnfsIfaeInterface() if opts.input_from=='ruciopnfs' else
+                           sample_catalogues.At3ScratchDiskInterface() if opts.input_from=='at3disk' else
+                           sample_catalogues.LocalDiskInterface('VLQAnalysis/data/filelist/', base_input_dir=opts.input_from))
+
+        sample_catalogue.add_filelists(samples=samples_to_process, input_interface=input_interface)
+
+        for sample in samples_to_process:
+            for systematic in sample.systematic_uncertainties:
+                for variation in [v for v in systematic.variations if v.name=='nominal']:
+                    job_manager.create_job(sample, systematic, variation)
+        if opts.merge_fewer>1:
+            job_manager.merge_jobs(min_n_input_files=opts.merge_fewer)
+    else:
+        if opts.verbose:
+            print "JobManager: using existing scripts from %s" % job_manager.relative_input_directory
+        # TODO (perhaps: need to think about expected behaviour) filter samples?
+        # Q: how would you filter on merged samples? on the jobset name or on the sample name
     if opts.check_outputs:
         job_manager.check_outputs()
     else:
-        job_manager.merge_jobs()
         job_manager.submit_jobs()
 
+
+def is_valid_input(opts):
+    "either you specified a predifined input interface, or a directory"
+    return (opts.input_from in ['at3disk', 'eosuser', 'rucioeos', 'ruciopnfs'] or
+            os.path.isdir(opts.input_from))
+
+
 if __name__=='__main__':
     main()
diff --git a/python/sample_catalogues.py b/python/sample_catalogues.py
index b1cbeb0..94b8eff 100644
--- a/python/sample_catalogues.py
+++ b/python/sample_catalogues.py
@@ -596,7 +596,7 @@ class LocalDiskInterface(InputDataInterface):
     If there is no filelist just generate it.
     It assumes that there is one sub-directory for each container.
     """
-    def __init__(self, filelist_dir, base_input_dir):
+    def __init__(self, filelist_dir=None, base_input_dir=None):
         super(LocalDiskInterface, self).__init__(filelist_dir)
         self.base_input_dir = base_input_dir
 
@@ -618,6 +618,18 @@ class LocalDiskInterface(InputDataInterface):
         if not os.path.exists(filelist_path):
             self.generate_filelist(container)
         return filelist_path
+
+#___________________________________________________________
+
+class At3ScratchDiskInterface(LocalDiskInterface):
+    """Data downloaded to the scratch2 disk on at3.
+    Currently 00-10 production.
+    """
+    def __init__(self,
+                 filelist_dir='VLQAnalysis/data/hbsm/filelist/at3pnfs',
+                 base_input_dir='/nfs/at3/scratch2/lvalery/VLQFiles/AT-00-00-10/'):
+        super(At3ScratchDiskInterface, self).__init__(filelist_dir, base_input_dir)
+
 #___________________________________________________________
 
 class EosUserInterface(InputDataInterface):
@@ -737,16 +749,6 @@ class RucioPnfsIfaeInterface(RseInterface):
                  root_prefix_placeholder='/pnfs/pic.es'):
         super(RucioPnfsIfaeInterface, self).__init__(filelist_dir, rse, root_prefix, root_prefix_placeholder)
 
-class At3ScratchDiskInterface(LocalDiskInterface):
-    """Data downloaded to the scratch2 disk on at3.
-    Currently 00-10 production.
-    """
-    def __init__(self,
-                 filelist_dir='VLQAnalysis/data/hbsm/filelist/at3pnfs',
-                 base_input_dir='/nfs/at3/scratch2/lvalery/VLQFiles/AT-00-00-10/'):
-        super(At3ScratchDiskInterface, self).__init__(filelist_dir, base_input_dir)
-
-
 #___________________________________________________________
 
 if __name__=='__main__':
-- 
GitLab


From 3341ca3cdac94116098a992d5870e5d1a7c83a7b Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Thu, 27 Oct 2016 10:08:47 +0200
Subject: [PATCH 27/35] implement option --print-local

This is to print the command that one needs to run locally to test the
code. It works as expected, although there is still some issue with
the sample filtering: even when I specify --sample-include, I get all
samples except if --overwrite-script.
---
 python/batch_utils.py | 17 +++++++++++++++--
 python/hbsm_submit.py | 19 ++++++++++++++++---
 python/utils.py       | 16 ++++++++++++++++
 3 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/python/batch_utils.py b/python/batch_utils.py
index db2a676..9769054 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -58,6 +58,7 @@ class Job(object):
     keyword_output_file = 'outputFile='
     keyword_run_function = 'function run() {'
     keyword_main_function = 'function main() {'
+    keyword_exe_line = 'VLQAnalysis'
     def __init__(self, script_path):
         self.script_path = script_path
         self._expected_output_file = None
@@ -75,6 +76,15 @@ class Job(object):
         output_file = file_path = next((t for t in tokens if '.root' in t), '')
         output_file = output_file.strip()
         return output_file
+
+    @classmethod
+    def parse_run_cmd_line(cls, file_path):
+        run_function = JobSet.extract_function(file_path, Job.keyword_run_function)
+        run_line     = ' '.join(l for l in utils.drop_continuation_lines(run_function.split('\n'))
+                                # if Job.keyword_run_function in l # TODO I think should be needed, but it works without ?????
+                                )
+        return run_line
+
     @property
     def expected_output_file(self):
         if not self._expected_output_file:
@@ -525,6 +535,9 @@ if __name__=='__main__':
     #             job_manager.create_job(sample, systematic, variation)
     # job_manager.submit_jobs()
 
+    print 'testing JobSet.extract_function'
+    print JobSet.extract_function(script_path='batch/test_2016-10-24/input/user.mcasolin.341543.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root_nominal.sh',
+                            starting_token=Job.keyword_run_function)
 
-    JobSet.extract_function(script_path='batch/test_2016-10-24/input/user.mcasolin.341543.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root_nominal.sh',
-                            starting_token = 'function run() {')
+    print 'testing Job.parse_run_cmd_line'
+    print Job.parse_run_cmd_line('batch/test_2016-10-26b/input/user.mcasolin.341541.aMcAtNloPythia8EvtGen.DAOD_TOPQ1.e4336_a766_a821_r7676_p2669.HtX4Tops_00-00-12_out.root_nominal.sh')
diff --git a/python/hbsm_submit.py b/python/hbsm_submit.py
index 4b7dc14..1ea5133 100755
--- a/python/hbsm_submit.py
+++ b/python/hbsm_submit.py
@@ -22,9 +22,13 @@ First time from a new area:
 
 Then:
 
+# prepare scripts
 %prog -l test_2016-10-19
+# just nominal, only hbsm samples, specify queue
 %prog -l test_2016-10-19 --groupfile VLQAnalysis/data/groups/hbsm/hbsm.txt  --syst nominal --queue 8nh --verbose
-
+# command to test the code locally on one sample
+%prog -l test_2016-10-27  --groupfile VLQAnalysis/data/groups/hbsm/hbsm.txt --sample-include 341541 --print-local
+# check the output root files
 %prog -l test_2016-10-19 --groupfile VLQAnalysis/data/groups/hbsm/hbsm.txt  --syst nominal --queue 8nh --check-outputs
 
 (run with -h to get all options)
@@ -49,7 +53,6 @@ The rules to go from the templates to the job-specific files are:
 TODO split large jobs
 TODO merge split outputs
 TODO resubmit broken jobs
-TODO just-print-exe
 """
 
 def main():
@@ -57,11 +60,13 @@ def main():
     parser = optparse.OptionParser(description=description, usage=usage)
     parser.add_option('-l', '--label', default=None, help='job label; used to make input/output subdirectories')
     parser.add_option('-q', '--queue', default=None)
+    # TODO fix syst option
     parser.add_option('-s', '--syst', default='nominal', help="variations to process ('weight', 'object', default %default).")
     parser.add_option('--list-systematics', action='store_true', help='list the systematics available in the catalogue')
     # parser.add_option('--syst-include', default='.*', help='include only the systematics matching the regexp')
     # parser.add_option('--syst-exclude', default=None, help='exclude the systematics matching the regexp')
     parser.add_option('--check-outputs', action='store_true', help='check the root output files') # TODO check and resubmit
+    # TODO the sample filtering works only with the --overwrite-scripts option?
     parser.add_option('--sample-include', default='.*', help='include only the samples matching the regexp (short name if available, else full_name)')
     parser.add_option('--sample-exclude', default=None, help='include only the samples matching the regexp (short name if available, else full_name)')
     parser.add_option('-S', '--submit', action='store_true', help='actually submit the jobs')
@@ -75,6 +80,7 @@ def main():
     parser.add_option('--input-from', default='rucioeos',
                       help='Where the ntuples are stored; see sample_catalogues.InputDataInterface')
     parser.add_option('--merge-fewer', default=1, type=int, help='merge jobs if less than N input files')
+    parser.add_option('--print-local', action='store_true', help='print the command to run locally')
     parser.add_option('-v', '--verbose', action='store_true', help='print what it is doing')
     parser.add_option('-d', '--debug', action='store_true', help='print even more debugging information')
 
@@ -111,7 +117,8 @@ def main():
         sample_catalogue.write_group_files()
         return
     if job_manager.needs_to_generate_scripts:
-        # if we need to generate scripts we need samples and filelists
+        if opts.verbose:
+            print "Need to generate scripts: gathering samples and filelists"
         sample_catalogue.add_samples_from_group_files(glob.glob(opts.groupfile) if opts.groupfile else
                                                       glob.glob(sample_catalogue.groupfiles_directory+'/*.txt'))
         sample_catalogue.prune_samples(regex_include=opts.sample_include,
@@ -137,6 +144,12 @@ def main():
                     job_manager.create_job(sample, systematic, variation)
         if opts.merge_fewer>1:
             job_manager.merge_jobs(min_n_input_files=opts.merge_fewer)
+    if opts.print_local:
+        if not all(type(j) is batch_utils.Job for j in job_manager.jobs):
+            raise NotImplementedError("This feature is available only for non-merged jobs; drop the --merge-fewer option")
+        print 'Commands to test the code locally:'
+        print '\n'.join(batch_utils.Job.parse_run_cmd_line(j.script_path) for j in job_manager.jobs)
+        return
     else:
         if opts.verbose:
             print "JobManager: using existing scripts from %s" % job_manager.relative_input_directory
diff --git a/python/utils.py b/python/utils.py
index 20b6d2f..c0b7206 100644
--- a/python/utils.py
+++ b/python/utils.py
@@ -139,6 +139,22 @@ def filename_without_extension(filename):
         raise IOError("'%s' is not a file" % filename)
     return os.path.splitext(os.path.basename(filename))[0]
 
+def drop_continuation_lines(split_lines=[]):
+    """remove continuation characters '\' from split lines
+
+    Adapted from
+    http://stackoverflow.com/questions/16480495/read-a-file-with-line-continuation-characters-in-python
+    """
+    out_lines = []
+    current_line = ''
+    for line in split_lines:
+        line = line.rstrip('\n')
+        if line.endswith('\\'):
+            current_line += line[:-1]
+        else:
+            out_lines.append(current_line)
+            current_line = ''
+    return out_lines
 #
 # testing
 #
-- 
GitLab


From e85c6c7d68fe89aa6c5674ae018198f87262861e Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Thu, 27 Oct 2016 15:28:29 +0200
Subject: [PATCH 28/35] implement --resubmit

And other minor improvements
- move some printout from 'verbose' to 'debug' (easier to follow
  what's being done)
- quieter mkdir_if_needed
---
 python/batch_utils.py | 38 +++++++++++++++++++++++++-------------
 python/hbsm_submit.py | 29 +++++++++++++++++------------
 python/utils.py       |  5 ++++-
 3 files changed, 46 insertions(+), 26 deletions(-)

diff --git a/python/batch_utils.py b/python/batch_utils.py
index 9769054..5530459 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -336,7 +336,7 @@ class JobManager(object):
             cmd = self.job_submission_command(queue=self.queue, verbose=self.verbose,
                                               base_dir=self.absolute_base_dir, job=job)
             if self.verbose:
-                print 'cmd: ',cmd
+                print cmd
             if not self.dry_run:
                 out = utils.get_command_output(cmd)
                 status_path = os.path.join(self.relative_status_directory, job.short_batch_file+'.submitted')
@@ -361,15 +361,29 @@ class JobManager(object):
             counter_job_any += 1
             counter_job_done += (1 if expect==done else 0)
             counter_job_miss += (0 if expect==done else 1)
-        print "Checked %d jobs: %d done, %d missing (%d/%d output files)" % (counter_job_any,
-                                                                             counter_job_done,
-                                                                             counter_job_miss,
-                                                                             counter_files_done,
-                                                                             counter_files_any)
-
-    def check_job(self, sample):
-        # TODO this should probably become obsolete (there's already bjobs)
-        raise NotImplementedError("todo")
+        print "Checked %d jobs: %d done, %d missing" % (counter_job_any, counter_job_done, counter_job_miss)
+        print "\t  (%d/%d output files)" % (counter_files_done, counter_files_any)
+
+    def resubmit_failed_jobs(self):
+        """For merged jobs, now resubmitting the whole script (even when partially done)
+
+        TODO implement a smarter JobSet resubmit where only the
+        required 'runN' functions are called, and the others are
+        commented out.
+        """
+        self.check_outputs()
+        unfinished_jobs = []
+        for job in self.jobs:
+            out_filenames = job.expected_output_files
+            expect = len(out_filenames)
+            done = sum(1 for f in out_filenames if os.path.exists(f))
+            if done < expect:
+                unfinished_jobs.append(job)
+        self.jobs = unfinished_jobs
+        if self.verbose:
+            print "about to resubmit %d failed jobs" % len(self.jobs)
+        self.submit_jobs()
+
     @property
     def relative_input_directory(self):
         "where the job script files will be generated"
@@ -386,9 +400,7 @@ class JobManager(object):
     def create_directories(self):
         for d in [self.relative_input_directory, self.relative_log_directory,
                   self.relative_output_directory, self.relative_status_directory]:
-            dir_path = utils.mkdir_if_needed(d)
-            if self.verbose:
-                print "created %s" % dir_path
+            dir_path = utils.mkdir_if_needed(d, verbose=self.verbose)
     def job_label(self, sample_name=None, variation_name=None):
         "The label used to distinguish one job from another."
         job_label = sample_name+'_'+variation_name
diff --git a/python/hbsm_submit.py b/python/hbsm_submit.py
index 1ea5133..44a8cfd 100755
--- a/python/hbsm_submit.py
+++ b/python/hbsm_submit.py
@@ -29,7 +29,7 @@ Then:
 # command to test the code locally on one sample
 %prog -l test_2016-10-27  --groupfile VLQAnalysis/data/groups/hbsm/hbsm.txt --sample-include 341541 --print-local
 # check the output root files
-%prog -l test_2016-10-19 --groupfile VLQAnalysis/data/groups/hbsm/hbsm.txt  --syst nominal --queue 8nh --check-outputs
+%prog -l test_2016-10-19 --groupfile VLQAnalysis/data/groups/hbsm/hbsm.txt  --syst nominal --queue 8nh --check
 
 (run with -h to get all options)
 
@@ -52,7 +52,6 @@ The rules to go from the templates to the job-specific files are:
 
 TODO split large jobs
 TODO merge split outputs
-TODO resubmit broken jobs
 """
 
 def main():
@@ -65,7 +64,8 @@ def main():
     parser.add_option('--list-systematics', action='store_true', help='list the systematics available in the catalogue')
     # parser.add_option('--syst-include', default='.*', help='include only the systematics matching the regexp')
     # parser.add_option('--syst-exclude', default=None, help='exclude the systematics matching the regexp')
-    parser.add_option('--check-outputs', action='store_true', help='check the root output files') # TODO check and resubmit
+    parser.add_option('--check', action='store_true', help='check the root output files')
+    parser.add_option('--resubmit', action='store_true', help='resubmit failed jobs')
     # TODO the sample filtering works only with the --overwrite-scripts option?
     parser.add_option('--sample-include', default='.*', help='include only the samples matching the regexp (short name if available, else full_name)')
     parser.add_option('--sample-exclude', default=None, help='include only the samples matching the regexp (short name if available, else full_name)')
@@ -91,6 +91,8 @@ def main():
         parser.error('Label cannot start with a digit')
     if not is_valid_input(opts):
         parser.error('Invalid --input-from')
+    if opts.resubmit and opts.overwrite_scripts:
+        parser.error('These two options are not compatible: --resubmit --overwrite-scripts')
     if opts.list_systematics:
         systematics.SystematicCatalogue().print_all()
         return
@@ -106,11 +108,11 @@ def main():
     if opts.overwrite_tar: job_manager.overwrite_tar = True
     if opts.verbose:
         utils.print_running_conditions(parser, opts)
-        print "Preparing jobs using the following template: %s" % job_manager.template_path
-
-    sample_catalogue = sample_catalogues.HbsmSampleCatalogue() # TODO or VlqSampleCatalogue
+    if opts.debug:
+        utils.print_parsed_options(parser, opts)
 
     if opts.generate_groupfiles:
+        sample_catalogue = sample_catalogues.HbsmSampleCatalogue() # TODO or VlqSampleCatalogue
         # TODO prompt: ask about sample list from new production
         sample_catalogue.add_samples_from_file(path='VLQAnalysis/data/samples_HtX4TopsNtuple-00-00-12.txt')
         sample_catalogue.categorise_samples(sample_catalogue.samples)
@@ -119,6 +121,7 @@ def main():
     if job_manager.needs_to_generate_scripts:
         if opts.verbose:
             print "Need to generate scripts: gathering samples and filelists"
+        sample_catalogue = sample_catalogues.HbsmSampleCatalogue() # TODO or VlqSampleCatalogue
         sample_catalogue.add_samples_from_group_files(glob.glob(opts.groupfile) if opts.groupfile else
                                                       glob.glob(sample_catalogue.groupfiles_directory+'/*.txt'))
         sample_catalogue.prune_samples(regex_include=opts.sample_include,
@@ -144,19 +147,21 @@ def main():
                     job_manager.create_job(sample, systematic, variation)
         if opts.merge_fewer>1:
             job_manager.merge_jobs(min_n_input_files=opts.merge_fewer)
+    else:
+        if opts.verbose:
+            print "JobManager: using existing scripts from %s" % job_manager.relative_input_directory
+        # TODO (perhaps: need to think about expected behaviour) filter samples?
+        # Q: how would you filter on merged samples? on the jobset name or on the sample name
     if opts.print_local:
         if not all(type(j) is batch_utils.Job for j in job_manager.jobs):
             raise NotImplementedError("This feature is available only for non-merged jobs; drop the --merge-fewer option")
         print 'Commands to test the code locally:'
         print '\n'.join(batch_utils.Job.parse_run_cmd_line(j.script_path) for j in job_manager.jobs)
         return
-    else:
-        if opts.verbose:
-            print "JobManager: using existing scripts from %s" % job_manager.relative_input_directory
-        # TODO (perhaps: need to think about expected behaviour) filter samples?
-        # Q: how would you filter on merged samples? on the jobset name or on the sample name
-    if opts.check_outputs:
+    if opts.check:
         job_manager.check_outputs()
+    elif opts.resubmit:
+        job_manager.resubmit_failed_jobs()
     else:
         job_manager.submit_jobs()
 
diff --git a/python/utils.py b/python/utils.py
index c0b7206..2a51c46 100644
--- a/python/utils.py
+++ b/python/utils.py
@@ -80,12 +80,13 @@ def json_read(fname) :
 def rm_if_exists(filename) :
     if os.path.exists(filename) : os.remove(filename)
 
-def mkdir_if_needed(dirname) :
+def mkdir_if_needed(dirname, verbose=False) :
     dest_dir = None
     if os.path.exists(dirname) and os.path.isdir(dirname) :
         dest_dir = dirname
     elif not os.path.exists(dirname) :
         os.makedirs(dirname)
+        if self.verbose: print "created %s" % dir_path
         dest_dir = dirname
     return dest_dir
 
@@ -121,6 +122,8 @@ def remove_duplicates(seq=[]) :
 def print_running_conditions(parser, opts):
     print "working from {0}".format(os.getcwd())
     print "being called as : {0}".format(' '.join(os.sys.argv))
+
+def print_parsed_options(parser, opts):
     all_options = [x.dest for x in parser._get_all_options()[1:]]
     print "options parsed:\n"+'\n'.join("%s : %s"%(o, str(getattr(opts, o))) for o in all_options)
 
-- 
GitLab


From 989550a9a3de628216f0d469fd0ba009ddce9069 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Thu, 3 Nov 2016 18:09:12 +0100
Subject: [PATCH 29/35] fix mkdir_if_needed

---
 python/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/utils.py b/python/utils.py
index 2a51c46..e4a33b0 100644
--- a/python/utils.py
+++ b/python/utils.py
@@ -86,7 +86,7 @@ def mkdir_if_needed(dirname, verbose=False) :
         dest_dir = dirname
     elif not os.path.exists(dirname) :
         os.makedirs(dirname)
-        if self.verbose: print "created %s" % dir_path
+        if verbose: print "created %s" % dirname
         dest_dir = dirname
     return dest_dir
 
-- 
GitLab


From 20abfc5a7917102c8574fbf650ab2ec55f7839b9 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Thu, 3 Nov 2016 18:32:15 +0100
Subject: [PATCH 30/35] update templates for splitting

Details:

Avoid `mkdir` in the prepare function. When splitting a job into
subjobs, we want to keep the `prepare` update the `run`. Since the
subdirectories where the output will be are specific to each sub-job,
the `mkdir` must be in the `run`.
This is also a cleaner flow, where `prepare` just does that (extract
tar and compile)
---
 data/hbsm/batch/templates/cern/one_lep_nom.sh | 18 +++++++-----------
 data/hbsm/batch/templates/ifae/one_lep_nom.sh | 18 +++++++-----------
 2 files changed, 14 insertions(+), 22 deletions(-)

diff --git a/data/hbsm/batch/templates/cern/one_lep_nom.sh b/data/hbsm/batch/templates/cern/one_lep_nom.sh
index 9cd8f84..bff9eec 100644
--- a/data/hbsm/batch/templates/cern/one_lep_nom.sh
+++ b/data/hbsm/batch/templates/cern/one_lep_nom.sh
@@ -34,21 +34,17 @@ function prepare() {{
     echo "untar tarball.tgz"
     ls -lh tarball.tgz
     tar xzf tarball.tgz
-
-    mkdir -p {relative_log_dir:s}
-    mkdir -p {relative_output_dir:s}
-    local subtask_log_file={relative_log_dir:s}/compile_{job_label:s}.log
-    echo "Starting 'compilation' step `date`"   >> ${{subtask_log_file}} 2>&1
-    rc find_packages                            >> ${{subtask_log_file}} 2>&1
-    rc clean                                    >> ${{subtask_log_file}} 2>&1
-    rc compile                                  >> ${{subtask_log_file}} 2>&1
-    echo "Completed 'compile' step `date`"      >> ${{subtask_log_file}} 2>&1
-    ls -ltrh {relative_log_dir:s}/*
-    rsync -az {relative_log_dir:s}/*    {absolute_base_dir:s}/{relative_log_dir:s}
+    echo "Starting 'compilation' step `date`"
+    rc find_packages
+    rc clean
+    rc compile
+    echo "Completed 'compile' step `date`"
 }}
 
 function run() {{
     echo "Processing {sample_name:s} `date`"
+    mkdir -p {relative_log_dir:s}
+    mkdir -p {relative_output_dir:s}
     subtask_log_file={relative_log_dir:s}/run_{job_label:s}.log
     VLQAnalysis \
  --outputFile={relative_output_dir:s}/{output_file:s} \
diff --git a/data/hbsm/batch/templates/ifae/one_lep_nom.sh b/data/hbsm/batch/templates/ifae/one_lep_nom.sh
index c3bdc71..8ffe8d5 100644
--- a/data/hbsm/batch/templates/ifae/one_lep_nom.sh
+++ b/data/hbsm/batch/templates/ifae/one_lep_nom.sh
@@ -31,21 +31,17 @@ function prepare() {{
     echo "untar tarball.tgz"
     ls -lh tarball.tgz
     tar xzf tarball.tgz
-
-    mkdir -p {relative_log_dir:s}
-    mkdir -p {relative_output_dir:s}
-    local subtask_log_file={relative_log_dir:s}/compile_{job_label:s}.log
-    echo "Starting 'compilation' step `date`"   >> ${{subtask_log_file}} 2>&1
-    rc find_packages                            >> ${{subtask_log_file}} 2>&1
-    rc clean                                    >> ${{subtask_log_file}} 2>&1
-    rc compile                                  >> ${{subtask_log_file}} 2>&1
-    echo "Completed 'compile' step `date`"      >> ${{subtask_log_file}} 2>&1
-    ls -ltrh {relative_log_dir:s}/*
-    rsync -az {relative_log_dir:s}/*    {absolute_base_dir:s}/{relative_log_dir:s}
+    echo "Starting 'compilation' step `date`"
+    rc find_packages
+    rc clean
+    rc compile
+    echo "Completed 'compile' step `date`"
 }}
 
 function run() {{
     echo "Processing {sample_name:s} `date`"
+    mkdir -p {relative_log_dir:s}
+    mkdir -p {relative_output_dir:s}
     local subtask_log_file={relative_log_dir:s}/run_{job_label:s}.log
     VLQAnalysis \
  --outputFile={relative_output_dir:s}/{output_file:s} \
-- 
GitLab


From 804f84eb6f7102e4741c50b1f35000185830ecc0 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Thu, 3 Nov 2016 18:40:46 +0100
Subject: [PATCH 31/35] SystematicCatalogue: when systematics queried, return
 their copies

Otherwise different samples will share the same variations. Bugfix.
---
 python/sample_catalogues.py |  2 +-
 python/systematics.py       | 27 +++++++++++++++++++++------
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/python/sample_catalogues.py b/python/sample_catalogues.py
index 94b8eff..8333c46 100644
--- a/python/sample_catalogues.py
+++ b/python/sample_catalogues.py
@@ -87,7 +87,7 @@ class Sample(object):
         self._group = group
         self.job_options = job_options
         self.ds_input = ds_input
-        self.systematic_uncertainties = [catalogue.nominal]
+        self.systematic_uncertainties = [catalogue.nominal()]
         if not full_name:
             raise ValueError("Sample must have full_name")
 
diff --git a/python/systematics.py b/python/systematics.py
index 3e91689..3d6f894 100644
--- a/python/systematics.py
+++ b/python/systematics.py
@@ -9,6 +9,8 @@ davide.gerbaudo@gmail.com
 Jul 2016
 """
 
+import copy
+
 class Variation(object):
     treename_suffix = '_Loose' # this is the suffix that should be removed from the output filenames
     def __init__(self, input_tree):
@@ -62,8 +64,18 @@ class SystematicUncertainty(object):
         return self.name=='nominal'
 
 class SystematicCatalogue(object):
+    """A catalogue of uncertainties
+
+    Note: when a used asks for the uncertainties (and their
+    variations), we always provide a copy. The reason for this is that
+    they will be attached to samples, so we want to have different
+    copies of the same Variation objects for each sample.
+
+    Note to self: perhaps I should also hide '._' all the remaining
+    attributes (such as electron_object_uncertainties etc.).
+    """
     def __init__(self):
-        self.nominal = SystematicUncertainty(name='nominal', variations=[Variation(input_tree='nominal_Loose')])
+        self._nominal = SystematicUncertainty(name='nominal', variations=[Variation(input_tree='nominal_Loose')])
         self.electron_object_uncertainties = [
             SystematicUncertainty(name='EG_RESOLUTION_ALL',
                                   variations=[ObjectVariation(input_tree='EG_RESOLUTION_ALL__1up_Loose'),
@@ -152,16 +164,19 @@ class SystematicCatalogue(object):
             ]
 
     def object_uncertainties(self):
-        return (self.electron_object_uncertainties +
-                self.jet_object_uncertainties +
-                self.met_object_uncertainties +
-                self.muon_object_uncertainties)
+        return copy.deepcopy([self.electron_object_uncertainties +
+                              self.jet_object_uncertainties +
+                              self.met_object_uncertainties +
+                              self.muon_object_uncertainties])
+
+    def nominal(self):
+        return copy.deepcopy(self._nominal)
 
     def weight_uncertainties(self):
         raise NotImplementedError("SystematicCatalogue: weight_uncertainties not there yet")
 
     def all_uncertainties(self):
-        return [self.nominal] + self.object_uncertainties() # + self.weight_uncertainties()
+        return [self.nominal()] + self.object_uncertainties() # + self.weight_uncertainties()
 
     def print_all(self):
         print "SystematicCatalogue:\n"+'\n'.join([s.name for s in self.all_uncertainties()])
-- 
GitLab


From e9a8d7ff559eea6787a925aa85d982ef9a691a6c Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Thu, 3 Nov 2016 18:44:44 +0100
Subject: [PATCH 32/35] improvements SampleCatalogue

- make keyword_job_option static (avoid repetition)
- SampleCatalogue.add_filelists --> InputDataInterface.attach_filelists
---
 python/sample_catalogues.py | 48 +++++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 21 deletions(-)

diff --git a/python/sample_catalogues.py b/python/sample_catalogues.py
index 8333c46..97a25bf 100644
--- a/python/sample_catalogues.py
+++ b/python/sample_catalogues.py
@@ -146,6 +146,8 @@ class SampleCatalogue(object):
     TODO attach syst uncertainties to samples
 
     """
+    keyword_job_option = 'config:' # same convention as in MultibjetsAnalysis
+
     def __init__(self):
         self.samples = []
         self.verbose = False
@@ -168,7 +170,7 @@ class SampleCatalogue(object):
         generated from HtX4TopsNtuple.
         """
         job_options = None
-        keyword_job_option = 'config:' # same convention as in MultibjetsAnalysis
+        keyword_job_option = SampleCatalogue.keyword_job_option
         for line in SampleCatalogue.read_lines_from_txt(path, keywords_useful_comment_line=[keyword_job_option]):
             if keyword_job_option in line:
                 job_options = line[line.find(keyword_job_option)+len(keyword_job_option):].strip()
@@ -211,26 +213,6 @@ class SampleCatalogue(object):
     def add_systematic_variations(cls, samples):
         raise NotImplementedError("This operation depends on the analysis,"
                                   " and it is implemented only in the specialised catalogues")
-    def add_filelists(self, samples=None, input_interface=None):
-        "Attach a filelist to each one of the input samples x variations"
-        samples = self.samples if not samples else samples
-        if self.verbose:
-            print "About to check/create filelists for %d samples; this might take some time." % len(samples)
-        for sample in samples:
-            for uncertainty in sample.systematic_uncertainties:
-                for variation in uncertainty.variations:
-                    filelist = input_interface.generate_filelist(sample.full_name)
-                    variation.filelist = filelist
-                    if self.verbose:
-                        print "%s %s %s added filelist (%d files) %s"
-
-                    # note to self: here sample knows about the
-                    # container name, and variation knows about the
-                    # treename. It assumes that the object variation
-                    # trees are in the same file as the nominal one.
-                    # I might need to revise this when we start using
-                    # systematic samples?
-
     @property
     def uncategorised_samples(self):
         return [s for s in self.samples if not s.group]
@@ -589,6 +571,30 @@ class InputDataInterface(object):
         raise NotImplementedError("Should be implemented in specialised classes")
     def filelist(self, container):
         raise NotImplementedError("Should be implemented in specialised classes")
+
+    def attach_filelists(self, samples=[], verbose=False):
+        "Attach a filelist to each one of the input samples x variations"
+        if verbose:
+            print "About to check/create filelists for %d samples; this might take some time." % len(samples)
+        for sa in samples:
+            for su in sa.systematic_uncertainties:
+                for v in su.variations:
+                    v.filelist = str(self.generate_filelist(sa.full_name))
+                    if verbose:
+                        n_files = sum(1 for l in open(v.filelist).readlines() if not l.strip().startswith('#'))
+                        print "%s added filelist (%d files) %s %s" % (self.filelist_dir,
+                                                                      n_files,
+                                                                      sa.full_name,
+                                                                      '', #sys not needed for now
+                                                                      )
+                    # note to self: here sample knows about the
+                    # container name, and variation knows about the
+                    # treename. It assumes that the object variation
+                    # trees are in the same file as the nominal one.
+                    # I might need to revise this when we start using
+                    # systematic samples?
+        # return samples
+
 #___________________________________________________________
 
 class LocalDiskInterface(InputDataInterface):
-- 
GitLab


From 2a4826504d042cec00056d6d0ae8593c6743b27c Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Fri, 4 Nov 2016 12:48:39 +0100
Subject: [PATCH 33/35] Add feature to split large jobs

Details:
- SampleCatalogue.add_filelists --> InputDataInterface.attach_filelists
- Sample add nickname
- add Job.has_multiple_configuration_lines (check whether filelist can be split)
- add Job.parse_input_file (extract input filelist)
- add Job.keyword_input_line
- add Job.nickname (was short_batch_file, used to submit split jobs with reasonable job names and log files)
- add JobSplit
- JobManager: add split_jobs
- JobManager.create_directories: add subdir option
- add SplitMap to job manager (keeps track of which jobs are split to which subjobs)
---
 python/batch_utils.py | 249 ++++++++++++++++++++++++++++++++++++++----
 python/hbsm_submit.py |   8 +-
 2 files changed, 232 insertions(+), 25 deletions(-)

diff --git a/python/batch_utils.py b/python/batch_utils.py
index 5530459..145f588 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -29,10 +29,10 @@ Jul 2016
 import glob
 import os
 # import re
-# import collections
+import collections
 
 from VLQAnalysis import utils
-from VLQAnalysis.sample_catalogues import HbsmSampleCatalogue, LocalDiskInterface, RucioEosCernInterface
+from VLQAnalysis.sample_catalogues import HbsmSampleCatalogue, SampleCatalogue, LocalDiskInterface, RucioEosCernInterface
 
 #___________________________________________________________
 
@@ -56,18 +56,21 @@ class Job(object):
     """A job with a script file and an expected ouput.
     """
     keyword_output_file = 'outputFile='
+    keyword_input_line = 'inputFile='
     keyword_run_function = 'function run() {'
     keyword_main_function = 'function main() {'
     keyword_exe_line = 'VLQAnalysis'
-    def __init__(self, script_path):
+    def __init__(self, script_path, nickname=None):
         self.script_path = script_path
         self._expected_output_file = None
         self._number_input_files = None
+        self._nickname = nickname # to get reasonable names for split jobs; otherwise = basename(script_path)
 
     @property
-    def short_batch_file(self):
+    def nickname(self):
         "use to name the status and log files"
-        return utils.filename_without_extension(self.script_path) # w/out ext, =sample name
+        return (self._nickname if self._nickname else
+                utils.filename_without_extension(self.script_path)) # w/out ext, =sample name
     @classmethod
     def parse_expected_output_file(cls, file_path):
         cmd = "grep %s %s" % (Job.keyword_output_file, file_path)
@@ -77,6 +80,13 @@ class Job(object):
         output_file = output_file.strip()
         return output_file
 
+    @classmethod
+    def parse_input_file(cls, file_path):
+        cmd = "grep %s %s" % (Job.keyword_input_line, file_path)
+        out = utils.get_command_output(cmd)
+        input_file = out['stdout'].split('=')[1].strip().split()[0]
+        return input_file
+
     @classmethod
     def parse_run_cmd_line(cls, file_path):
         run_function = JobSet.extract_function(file_path, Job.keyword_run_function)
@@ -85,12 +95,26 @@ class Job(object):
                                 )
         return run_line
 
+    @classmethod
+    def has_multiple_configuration_lines(cls, input_filelist):
+        number_of_config_blocks = 0
+        in_config_block = False
+        keyword = SampleCatalogue.keyword_job_option
+        for line in SampleCatalogue.read_lines_from_txt(input_filelist,
+                                                        keywords_useful_comment_line=[keyword]):
+            if in_config_block and keyword in line:
+                continue
+            elif keyword in line:
+                in_config_block = True
+                number_of_config_blocks += 1
+            else:
+                in_config_block = False
+
+        return number_of_config_blocks>1
+
     @property
     def expected_output_file(self):
         if not self._expected_output_file:
-            cmd = 'grep "%s" %s' % (Job.keyword_output_file, self.script_path)
-            out = utils.get_command_output(cmd)
-            tokens = out['stdout'].strip().replace('=', ' ').split()
             self._expected_output_file = self.parse_expected_output_file(file_path=self.script_path)
             if not self._expected_output_file:
                 raise RuntimeError("cannot extract output file from %s" % self.script_path)
@@ -223,14 +247,125 @@ class JobSet(object):
         return '\n'.join(function_lines)
 
     @property
-    def short_batch_file(self):
+    def nickname(self):
         "use to name the status and log files"
         return utils.filename_without_extension(self.script_path) # w/out ext
 
 #___________________________________________________________
 
+class JobSplit(object):
+    """A job split in several subjobs.
+
+    A jobset can be built either splitting an existing job or from several splitted script.
+
+    The input/log/output/status files will be stored in the
+    'split/nnn/mmm' subdirectories of each directory used by
+    JobManager, where nnn is a counter of the JobSplit for this
+    session (i.e. JobManager lable) and mmm is the index of the
+    children job.
+    """
+    existing_jobsplit_counter = 0 # used to keep track of NN
+    def __init__(self, base_job=None, children_jobs=[], job_manager=None, max_n_input_files=10, verbose=False):
+        need_to_write_children_scripts = len(children_jobs)==0
+        if not need_to_write_children_scripts:
+            self.parent_job = base_job
+            self.children_jobs = children_jobs
+        else:
+            self.parent_job = base_job
+            self.children_jobs = []
+            nnn = "%03d" % JobSplit.existing_jobsplit_counter
+            input_filelist = Job.parse_input_file(file_path=base_job.script_path)
+            filelist_dir = os.path.dirname(input_filelist)
+            dirs_to_modify = [job_manager.relative_input_directory,
+                              job_manager.relative_log_directory,
+                              job_manager.relative_output_directory,
+                              job_manager.relative_status_directory]
+            script_dir = os.path.dirname(base_job.script_path)
+            script_content = open(base_job.script_path).read()
+            run_function = JobSet.extract_function(base_job.script_path, Job.keyword_run_function)
+            sub_filelists = JobSplit.split_filelist(orig_filelist=input_filelist,
+                                                    dest_subdir=os.path.join(filelist_dir, nnn),
+                                                    max_n_input_files=max_n_input_files)
+            for iJob, sub_filelist in enumerate(sub_filelists):
+                mmm = "%03d" % iJob
+                job_manager.create_directories(subdir=nnn+'/'+mmm, verbose=False)
+                script_dest = utils.mkdir_if_needed(script_dir+'/'+nnn)+'/'+mmm+'.sh'
+                new_run_function = run_function.replace(input_filelist, sub_filelist)
+                for dtm in dirs_to_modify:
+                    new_run_function = new_run_function.replace(dtm, dtm+'/'+nnn+'/'+mmm)
+                with open(script_dest, 'w') as of:
+                    of.write(script_content.replace(run_function, new_run_function))
+                    os.chmod(self.script_dest, 0755)
+                self.children_jobs.append(Job(script_dest, nickname=nnn+'_'+mmm))
+            if verbose:
+                print "split job in %d subjobs (subdir '%s') %s" % (len(self.children_jobs), nnn, self.parent_job.script_path)
+            JobSplit.existing_jobsplit_counter += 1
+
+    @classmethod
+    def split_filelist(cls, orig_filelist=None, dest_subdir=None, max_n_input_files=10):
+        "take a filelist, split it and return a list of smaller ones that have at most N input files"
+        if not os.path.exists(orig_filelist) and orig_filelist.endswith('.txt'):
+            raise NotImplementedError("This does not look like a txt filelist: %s" % orig_filelist)
+        input_lines = []
+        with open(orig_filelist) as input_file:
+            input_lines = [l.strip() for l in input_file.readlines()]
+            if Job.has_multiple_configuration_lines(orig_filelist):
+                raise NotImplementedError("Cannot split a filelists containing multiple '# config:' blocks:"
+                                          "\n %s; split it." % orig_filelist)
+        utils.mkdir_if_needed(dest_subdir)
+        sub_job_counter = 0
+        sub_job_lines = []
+        comment_lines = [l for l in input_lines if l.startswith('#')] # these are propagated to each subfile
+        other_lines = [l for l in input_lines if not l.startswith('#')]
+        sub_filelists = []
+        for line in other_lines:
+            sub_job_lines.append(line)
+            if len(sub_job_lines) >= max_n_input_files:
+                sub_job_filelist = "%s/%03d.txt" % (dest_subdir, sub_job_counter)
+                with open(sub_job_filelist, 'w') as of:
+                        of.write('\n'.join(comment_lines + sub_job_lines + ['']))
+                sub_job_counter += 1
+                sub_job_lines = []
+                sub_filelists.append(sub_job_filelist)
+        if len(sub_job_lines):
+            sub_job_filelist = "%s/%03d.txt" % (dest_subdir, sub_job_counter)
+            with open(sub_job_filelist, 'w') as of:
+                of.write('\n'.join(comment_lines + sub_job_lines + ['']))
+            sub_job_counter += 1
+            sub_filelists.append(sub_job_filelist)
+        return sub_filelists
+
+
+    # TODO
+    # @property
+    # def expected_output_files(self):
+    #     "No need to cache here: the constructor should extract from the script file (which is always there)"
+    #     if not self._expected_output_files:
+    #         raise RuntimeError("something went wrong when parsing %s?" % self.script_path)
+    #     return self._expected_output_files
+#___________________________________________________________
+
+class SplitMap(object):
+    """Used to keep track of how jobs are split
+
+    Just a list of lines, where in each line the first word is the
+    master script_path, and the following ones are the children script
+    paths. Mainly useful for __str__.
+    """
+    def __init__(self, splitjobs=[]):
+        self.splitjobs = splitjobs
+    def __str__(self):
+        return '\n'.join(' '.join([jm.parent_job.script_path]+
+                                  [jc.script_path for jc in jm.children_jobs])
+                         for jm in self.splitjobs)
+
+#___________________________________________________________
+
 class JobManager(object):
-    "Manage a set of jobs; all inputs/outputs will be under batch/<jobset_label>"
+    """Manage a set of jobs; all inputs/outputs will be under batch/<jobset_label>
+
+    A split_map is used to keep track of what job goes into which subjobs.
+    """
     def __init__(self, jobset_label, verbose=False, debug=False, overwrite_batch_scripts=False):
         self.jobset = jobset_label
         self.queues = []
@@ -249,15 +384,68 @@ class JobManager(object):
         self.create_directories()
         existing_scripts = glob.glob(self.relative_input_directory+'/*.sh')
         if existing_scripts and not overwrite_batch_scripts:
-            using_merged_scripts = all('jobset' in f for f in existing_scripts)
             using_sample_scripts = all('jobset' not in f for f in existing_scripts)
+            using_merged_scripts = all('jobset' in f for f in existing_scripts)
+            using_split_scripts = os.path.exists(self.split_map_file_path)
             if using_merged_scripts==using_sample_scripts:
                 raise NotImplementedError("Cannot handle a mix of merged/unmerged scripts from %s" % self.relative_input_directory)
             self.jobs = [JobSet(script_path=f, verbose=self.verbose) if using_merged_scripts else
                          Job(script_path=f)
                          for f in sorted(existing_scripts)]
+            if using_split_scripts:
+                self.read_split_map()
         elif verbose:
             print "JobManager: you now need to loop over samples/variations and create the jobs"
+
+    def __del__(self):
+        self.save_split_map()
+
+    @property
+    def split_map_file_path(self):
+        return self.relative_status_directory+'/split_map.txt'
+
+    def save_split_map(self):
+        splitjobs = self.get_split_jobs()
+        if splitjobs:
+            split_map = SplitMap(splitjobs=splitjobs)
+            with open(self.relative_status_directory+'/split_map.txt', 'w') as of:
+                of.write(str(split_map))
+            if self.debug:
+                print "saved split map in %s" % self.relative_status_directory
+
+    def read_split_map(self):
+        "Read the txt file, and replace the master Job with its corresponding JobSplit"
+        split_lines = []
+        map_path = self.split_map_file_path
+        with open(map_path, 'r') as input_file:
+            split_lines = [l.strip() for l in input_file.readlines() if l.strip()]
+        if not split_lines:
+            return
+        master_counts = collections.Counter([l.split()[0] for l in split_lines])
+        most_common = master_counts.most_common()
+        max_label, max_occurrences = most_common[0]
+        min_label, min_occurrences = most_common[-1]
+        if max_occurrences!=1 or min_occurrences!=1:
+            raise NotImplementedError("The split map %s is invalid; %d duplicates '%s'" % (map_path, max_occurrences, max_label))
+        nnn = 0
+        for line in split_lines:
+            tokens = line.split()
+            if len(tokens)<len(['master', 'child0', 'child1']):
+                raise NotImplementedError("Cannot parse split map line: too few elements.\n > %s"%line)
+            master_script = tokens[0]
+            children_scripts = tokens[1:]
+            iJob = next(i for i, job in enumerate(self.jobs) if type(job)==Job and job.script_path==master_script)
+            self.jobs[iJob] = JobSplit(base_job=self.jobs[iJob],
+                                       children_jobs=[Job(script_path=cs, nickname="%03d_%03d"%(nnn, mmm))
+                                                      for mmm, cs in enumerate(children_scripts)],
+                                       verbose=False)
+            nnn += 1
+        if self.debug:
+            print "read %d split jobs from %s" % self.relative_status_directory
+
+    def get_split_jobs(self): # just because 'split_jobs' is already a member function
+        return [j for j in self.jobs if type(j) is JobSplit]
+
     @property
     def needs_to_generate_scripts(self):
         return self.overwrite_batch_scripts or not self.jobs
@@ -311,6 +499,12 @@ class JobManager(object):
                 print "created batch file %s" % batch_filename
         self.jobs.append(Job(batch_filename))
 
+    def split_jobs(self, split_every_n=10):
+        "replace large jobs with jobsplits"
+        for iJob, job in enumerate(self.jobs):
+            if job.number_input_files>split_every_n:
+                self.jobs[iJob] = JobSplit(base_job=job, job_manager=self, max_n_input_files=split_every_n, verbose=self.verbose)
+
     def merge_jobs(self, min_n_input_files=10):
         "replace jobs with merged jobsets"
         jobs = []
@@ -332,17 +526,26 @@ class JobManager(object):
         self.jobs = jobsets
 
     def submit_jobs(self):
+        submission_commands = []
         for job in self.jobs:
-            cmd = self.job_submission_command(queue=self.queue, verbose=self.verbose,
-                                              base_dir=self.absolute_base_dir, job=job)
+            if type(job) is JobSplit:
+                for subjob in job.children_jobs:
+                    cmd = self.job_submission_command(queue=self.queue, verbose=self.verbose,
+                                                      base_dir=self.absolute_base_dir, job=subjob)
+                    submission_commands.append(cmd)
+            else:
+                cmd = self.job_submission_command(queue=self.queue, verbose=self.verbose,
+                                                  base_dir=self.absolute_base_dir, job=job)
+                submission_commands.append(cmd)
+        for cmd in submission_commands:
             if self.verbose:
                 print cmd
             if not self.dry_run:
                 out = utils.get_command_output(cmd)
-                status_path = os.path.join(self.relative_status_directory, job.short_batch_file+'.submitted')
-                with open(status_path, 'w') as status_file:
-                    status_file.write('stdout:\n'+out['stdout']+
-                                      'stderr:\n'+ out['stderr'])
+                # status_path = os.path.join(self.relative_status_directory, job.nickname+'.submitted')
+                # with open(status_path, 'w') as status_file:
+                #     status_file.write('stdout:\n'+out['stdout']+
+                #                       'stderr:\n'+ out['stderr'])
         if self.dry_run:
             print "This was a dry run. To actually submit the jobs run with '--submit'"
     def check_outputs(self):
@@ -397,10 +600,10 @@ class JobManager(object):
     @property
     def relative_status_directory(self):
         return 'batch/'+self.jobset+'/status'
-    def create_directories(self):
+    def create_directories(self, subdir='', verbose=False):
         for d in [self.relative_input_directory, self.relative_log_directory,
                   self.relative_output_directory, self.relative_status_directory]:
-            dir_path = utils.mkdir_if_needed(d, verbose=self.verbose)
+            dir_path = utils.mkdir_if_needed(d+(('/'+subdir) if subdir else ''), verbose=verbose)
     def job_label(self, sample_name=None, variation_name=None):
         "The label used to distinguish one job from another."
         job_label = sample_name+'_'+variation_name
@@ -472,9 +675,9 @@ class LxbJobManager(JobManager):
                +" -L /bin/bash " # reset shell
                +" -q %s " % queue
                # not sure this is working
-               # +" -o %s/tthf-trex-utils/batch/log/%s_%s.oe" % (base_dir, opts.label, short_batch_file)
-               +" -J %s " % job.short_batch_file
-               +" -o %s.oe " % (self.relative_log_directory+'/'+job.short_batch_file)
+               # +" -o %s/tthf-trex-utils/batch/log/%s_%s.oe" % (base_dir, opts.label, nickname)
+               +" -J %s " % job.nickname
+               +" -o %s.oe " % (self.relative_log_directory+'/'+job.nickname)
                +" %s" % os.path.join(base_dir, job.script_path)
                )
         return cmd
@@ -496,7 +699,7 @@ class At3JobManager(JobManager):
     def job_submission_command(self, queue=None, verbose=None, base_dir=None, job=None):
         cmd = (" qsub "
                +" -j oe " # join stdout and stderr
-               +" -o %s.oe " % (self.relative_log_directory+'/'+job.short_batch_file)
+               +" -o %s.oe " % (self.relative_log_directory+'/'+job.nickname)
                +" -q %s " % queue
                +" %s" % os.path.join(base_dir, job.script_path)
                )
diff --git a/python/hbsm_submit.py b/python/hbsm_submit.py
index 44a8cfd..8490e54 100755
--- a/python/hbsm_submit.py
+++ b/python/hbsm_submit.py
@@ -80,6 +80,7 @@ def main():
     parser.add_option('--input-from', default='rucioeos',
                       help='Where the ntuples are stored; see sample_catalogues.InputDataInterface')
     parser.add_option('--merge-fewer', default=1, type=int, help='merge jobs if less than N input files')
+    parser.add_option('--split-larger', default=500, type=int, help='split jobs if more than N input files (default %default)')
     parser.add_option('--print-local', action='store_true', help='print the command to run locally')
     parser.add_option('-v', '--verbose', action='store_true', help='print what it is doing')
     parser.add_option('-d', '--debug', action='store_true', help='print even more debugging information')
@@ -138,13 +139,16 @@ def main():
                            sample_catalogues.RucioPnfsIfaeInterface() if opts.input_from=='ruciopnfs' else
                            sample_catalogues.At3ScratchDiskInterface() if opts.input_from=='at3disk' else
                            sample_catalogues.LocalDiskInterface('VLQAnalysis/data/filelist/', base_input_dir=opts.input_from))
+        input_interface.attach_filelists(samples=samples_to_process, verbose=opts.verbose)
 
-        sample_catalogue.add_filelists(samples=samples_to_process, input_interface=input_interface)
+        # samples_to_process = sample_catalogue.add_filelists(samples=samples_to_process, input_interface=input_interface)
 
         for sample in samples_to_process:
             for systematic in sample.systematic_uncertainties:
-                for variation in [v for v in systematic.variations if v.name=='nominal']:
+                for variation in systematic.variations:
                     job_manager.create_job(sample, systematic, variation)
+        if opts.split_larger>2:
+            job_manager.split_jobs(split_every_n=opts.split_larger)
         if opts.merge_fewer>1:
             job_manager.merge_jobs(min_n_input_files=opts.merge_fewer)
     else:
-- 
GitLab


From e2ffbc5cadf69d5ac10cfad837586d9d8a5647aa Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Mon, 7 Nov 2016 15:03:36 +0100
Subject: [PATCH 34/35] also check JobSplit

Details:
- implement JobSplit.expected_output_files
- when check, if verbose print the list of missing files
---
 python/batch_utils.py | 42 +++++++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/python/batch_utils.py b/python/batch_utils.py
index 145f588..52035d4 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -267,6 +267,8 @@ class JobSplit(object):
     existing_jobsplit_counter = 0 # used to keep track of NN
     def __init__(self, base_job=None, children_jobs=[], job_manager=None, max_n_input_files=10, verbose=False):
         need_to_write_children_scripts = len(children_jobs)==0
+        self._expected_output_file = None
+        self.requires_merging = True
         if not need_to_write_children_scripts:
             self.parent_job = base_job
             self.children_jobs = children_jobs
@@ -295,7 +297,7 @@ class JobSplit(object):
                     new_run_function = new_run_function.replace(dtm, dtm+'/'+nnn+'/'+mmm)
                 with open(script_dest, 'w') as of:
                     of.write(script_content.replace(run_function, new_run_function))
-                    os.chmod(self.script_dest, 0755)
+                os.chmod(script_dest, 0755)
                 self.children_jobs.append(Job(script_dest, nickname=nnn+'_'+mmm))
             if verbose:
                 print "split job in %d subjobs (subdir '%s') %s" % (len(self.children_jobs), nnn, self.parent_job.script_path)
@@ -335,6 +337,22 @@ class JobSplit(object):
             sub_filelists.append(sub_job_filelist)
         return sub_filelists
 
+    @property
+    def expected_output_file(self):
+        "TODO remove duplication with Job "
+        if not self._expected_output_file:
+            self._expected_output_file = self.parse_expected_output_file(file_path=self.script_path)
+            if not self._expected_output_file:
+                raise RuntimeError("cannot extract output file from %s" % self.script_path)
+        return self._expected_output_file
+
+    @property
+    def expected_output_files(self):
+        if self.requires_merging:
+            return [j.expected_output_file for j in self.children_jobs]
+        else:
+            return [self.expected_output_file]
+
 
     # TODO
     # @property
@@ -552,20 +570,22 @@ class JobManager(object):
         counter_job_any = 0
         counter_job_done = 0
         counter_job_miss = 0
-        counter_files_any = 0
-        counter_files_done = 0
+        files_done = []
+        files_miss = []
         for job in self.jobs:
             out_filenames = job.expected_output_files
-            expect = len(out_filenames)
-            done = sum(1 for f in out_filenames if os.path.exists(f))
-            miss = expect - done
-            counter_files_any += expect
-            counter_files_done += done
+            done = [f for f in out_filenames if os.path.exists(f)]
+            miss = [f for f in out_filenames if f not in done]
+            files_miss.extend(miss)
+            files_done.extend(done)
+            all_done = len(done)==len(miss)
             counter_job_any += 1
-            counter_job_done += (1 if expect==done else 0)
-            counter_job_miss += (0 if expect==done else 1)
+            counter_job_done += (1 if all_done else 0)
+            counter_job_miss += (0 if all_done else 1)
         print "Checked %d jobs: %d done, %d missing" % (counter_job_any, counter_job_done, counter_job_miss)
-        print "\t  (%d/%d output files)" % (counter_files_done, counter_files_any)
+        print "\t  (%d/%d output files)" % (len(files_done), len(files_done)+len(files_miss))
+        if self.verbose:
+            print 'Missing files:\n'+'\n'.join(files_miss)
 
     def resubmit_failed_jobs(self):
         """For merged jobs, now resubmitting the whole script (even when partially done)
-- 
GitLab


From 479605604cdf9e69ec98876b97a2702f92797a25 Mon Sep 17 00:00:00 2001
From: gerbaudo <davide.gerbaudo@gmail.com>
Date: Tue, 8 Nov 2016 17:28:14 +0100
Subject: [PATCH 35/35] check also JobSplit

---
 python/batch_utils.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/python/batch_utils.py b/python/batch_utils.py
index 52035d4..f57e950 100644
--- a/python/batch_utils.py
+++ b/python/batch_utils.py
@@ -133,6 +133,13 @@ class Job(object):
             filelist_path = filelist_path.split()[0].strip() # and drop any subsequent words
             self._number_input_files = sum(1 for l in open(filelist_path).readlines() if not l.strip().startswith('#'))
         return self._number_input_files
+    @classmethod
+    def is_completed(cls, job):
+        "make it a classmethod so that we can use it also for JobSet"
+        out_filenames = job.expected_output_files
+        expect = len(out_filenames)
+        done = sum(1 for f in out_filenames if os.path.exists(f))
+        return expect==done
 
 #___________________________________________________________
 
@@ -597,12 +604,13 @@ class JobManager(object):
         self.check_outputs()
         unfinished_jobs = []
         for job in self.jobs:
-            out_filenames = job.expected_output_files
-            expect = len(out_filenames)
-            done = sum(1 for f in out_filenames if os.path.exists(f))
-            if done < expect:
+            if type(job) is JobSplit:
+                unfinished_subjobs = [cj for cj in job.children_jobs if not Job.is_completed(cj)]
+                unfinished_jobs.extend(unfinished_jobs)
+            elif not Job.is_completed(job):
                 unfinished_jobs.append(job)
         self.jobs = unfinished_jobs
+        # note to self: this will not leave any partial JobSplit in the SplitMap, avoiding that it gets corrupted
         if self.verbose:
             print "about to resubmit %d failed jobs" % len(self.jobs)
         self.submit_jobs()
-- 
GitLab