From 068680e027cfb082cbee78a1b4dbbb8e1bd3761b Mon Sep 17 00:00:00 2001 From: Albert Puig <albert.puig@cern.ch> Date: Wed, 11 Jul 2018 11:07:14 +0200 Subject: [PATCH 1/2] Add formulate to data loading to speed up things. --- analysis/data/loaders.py | 10 +++++++--- setup.py | 3 ++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/analysis/data/loaders.py b/analysis/data/loaders.py index 0e9b1ca..f561249 100644 --- a/analysis/data/loaders.py +++ b/analysis/data/loaders.py @@ -17,6 +17,7 @@ import ROOT import numpy as np import pandas as pd from root_pandas import read_root +import formulate from analysis.data.converters import dataset_from_pandas from analysis.utils.logging_color import get_logger @@ -467,7 +468,8 @@ def get_root_from_root_file(file_name, tree_name, kwargs): leave_set = ROOT.RooArgSet() leave_list = [] if selection: - for var in leaves: + selection_expr = formulate.from_root(selection) + for var in selection_expr.variables: leave_list.append(ROOT.RooRealVar(var, var, 0.0)) leave_set.add(leave_list[-1]) name = ''.join(random.SystemRandom().choice(string.ascii_letters + string.digits) @@ -562,9 +564,11 @@ def get_pandas_from_root_file(file_name, tree_name, kwargs): if not os.path.exists(file_name): raise OSError("Cannot find input file -> {}".format(file_name)) selection = kwargs.get('selection') - variables = kwargs.get('variables') + variables = kwargs.get('variables', []) if selection: - output_data = read_root(file_name, tree_name).query(selection) + selection_expr = formulate.from_numexpr(selection) + full_variables = variables + list(selection_expr.variables) + output_data = read_root(file_name, tree_name, columns=full_variables).query(selection) if variables: output_data = output_data[variables] else: diff --git a/setup.py b/setup.py index aed8952..0bb0fe8 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,8 @@ setup(name='analysis', 'scipy', 'psutil', 'matplotlib', - 'seaborn'], + 'seaborn', + 'formulate'], packages=['analysis'], data_files=['LICENSE', 'README.md'], zip_safe=False) -- GitLab From 40096a62bfc5636c0a64f6904c011d264d28c0b9 Mon Sep 17 00:00:00 2001 From: Albert Puig <albert.puig@cern.ch> Date: Wed, 11 Jul 2018 11:35:21 +0200 Subject: [PATCH 2/2] Fix bug. --- analysis/data/loaders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analysis/data/loaders.py b/analysis/data/loaders.py index f561249..cc850a6 100644 --- a/analysis/data/loaders.py +++ b/analysis/data/loaders.py @@ -469,7 +469,7 @@ def get_root_from_root_file(file_name, tree_name, kwargs): leave_list = [] if selection: selection_expr = formulate.from_root(selection) - for var in selection_expr.variables: + for var in selection_expr.variables.union(variables): leave_list.append(ROOT.RooRealVar(var, var, 0.0)) leave_set.add(leave_list[-1]) name = ''.join(random.SystemRandom().choice(string.ascii_letters + string.digits) -- GitLab