From eb6684a5fdc749d542924b2435d687d71f216d39 Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 08:17:11 +0100
Subject: [PATCH 01/18] add dataframe extension modules

---
 quickstats/extensions/__init__.py            |   3 +
 quickstats/extensions/extension_dataframe.py | 102 +++++++++++++++++++
 2 files changed, 105 insertions(+)
 create mode 100644 quickstats/extensions/__init__.py
 create mode 100644 quickstats/extensions/extension_dataframe.py

diff --git a/quickstats/extensions/__init__.py b/quickstats/extensions/__init__.py
new file mode 100644
index 00000000..cb69ec0c
--- /dev/null
+++ b/quickstats/extensions/__init__.py
@@ -0,0 +1,3 @@
+__all__ = ["ExtensionDataFrame"]
+
+from .extension_dataframe import ExtensionDataFrame
\ No newline at end of file
diff --git a/quickstats/extensions/extension_dataframe.py b/quickstats/extensions/extension_dataframe.py
new file mode 100644
index 00000000..58d6c8b8
--- /dev/null
+++ b/quickstats/extensions/extension_dataframe.py
@@ -0,0 +1,102 @@
+from quickstats import AbstractObject
+
+class ExtensionDataFrame(AbstractObject):
+
+    @property
+    def dataframe(self):
+        return self._df
+        
+    def __init__(self, df:"pandas.DataFrame", verbosity:str="INFO"):
+        super().__init__(verbosity=verbosity)
+        self._df = df
+
+    def __repr__(self):
+        return self.dataframe.__repr__()
+
+    def _repr_html_(self):
+        return self.dataframe._repr_html_()
+
+    def _parse_argument(self, ops_name:str, index=None, columns=None, axis=None, **kwargs):
+        assert len(kwargs) == 1
+        argname = list(kwargs)[0]
+        argval  = kwargs[argname]
+        if (argval is None) and (index is None) and (columns is None):
+            raise TypeError(f"must pass an index to {ops_name}")
+        if (index is not None) or (columns is not None):
+            if axis is not None:
+                raise TypeError("cannot specify both 'axis' and any of 'index' or 'columns'")
+            if argval is not None:
+                raise TypeError(f"cannot specify both '{argname}' and any or 'index' or 'columns'")
+        else:
+            if axis and self.df._get_axis_number(axis) == 1:
+                columns = argval
+            else:
+                index = argval
+        return index, columns
+                
+        
+    def select_values(self, mapper=None, index=None, columns=None, axis=None, copy:bool=True, inplace:bool=False, invert:bool=False):
+        df = self._df
+        index, columns = self._parse_argument('select', index=index, columns=columns,
+                                              axis=axis, mapper=mapper)
+        # does not consider copy on write yet
+        result = self._df if inplace else self._df.copy(deep=copy)
+        index_ops = lambda x: result.index.get_level_values(x)
+        column_ops = lambda x: result[x]
+        for ops, maps in [(index_ops, index), (column_ops, columns)]:
+            if maps is None:
+                continue
+            for attribs, selections in maps.items():
+                if not isinstance(attribs, tuple):
+                    attribs = (attribs,)
+                    selections = (selections,)
+                masks = None
+                for i, attrib in enumerate(attribs):
+                    selection = selections[i]
+                    if not callable(selection):
+                        if isinstance(selection, (tuple, list)):
+                            mask = ops(attrib).isin(selection)
+                        else:
+                            mask = ops(attrib) == selection
+                    elif attrib is not None:
+                        mask = ops(attrib).apply(selection)
+                    else:
+                        mask = result.apply(selection)
+                    if masks is None:
+                        masks = mask
+                    else:
+                        masks &= mask
+                if invert:
+                    masks = ~masks
+                result = result[masks]
+        if inplace:
+            self._df = result
+            return None
+        return result
+
+    def reject_values(self, mapper=None, index=None, columns=None, axis=None, copy:bool=True, inplace:bool=False):
+        return self.select_values(mapper=mapper, index=index,
+                                  columns=columns, axis=axis,
+                                  copy=copy, inplace=inplace,
+                                  invert=True)
+
+    def concat(self, other, copy:bool=True, inplace:bool=False, order:str="first", **kwargs):
+        import pandas as pd
+        result = self._df if inplace else self._df.copy(deep=copy)
+        if order == "first":
+            if isinstance(other, list):
+                objs = [result] + other
+            else:
+                objs = [result, other]
+        elif order == "last":
+            if isinstance(other, list):
+                objs = other + [result]
+            else:
+                objs = [other, result]
+        else:
+            raise TypeError('order must be either "first" or "last"')
+        result = pd.concat(objs, **kwargs)
+        if inplace:
+            self._df = result
+            return None
+        return result
\ No newline at end of file
-- 
GitLab


From ba54eb0cdded5c903f632190ce204a273b669e3b Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 08:23:09 +0100
Subject: [PATCH 02/18] restructure resource files

---
 quickstats/__init__.py                                          | 2 +-
 .../mpl_stylesheets}/quick_default.mplstyle                     | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename quickstats/{stylesheets => resources/mpl_stylesheets}/quick_default.mplstyle (100%)

diff --git a/quickstats/__init__.py b/quickstats/__init__.py
index 3f9af632..ac476c09 100644
--- a/quickstats/__init__.py
+++ b/quickstats/__init__.py
@@ -16,8 +16,8 @@ os.environ['CPPYY_API_PATH'] = "none"
 
 module_path = pathlib.Path(__file__).parent.absolute()
 macro_path = os.path.join(module_path, 'macros')
-stylesheet_path = os.path.join(module_path, 'stylesheets')
 resource_path = os.path.join(module_path, 'resources')
+stylesheet_path = os.path.join(resource_path, 'mpl_stylesheets')
 
 # ROOT.gInterpreter.AddIncludePath(os.path.join(macro_path, "macros"))
 
diff --git a/quickstats/stylesheets/quick_default.mplstyle b/quickstats/resources/mpl_stylesheets/quick_default.mplstyle
similarity index 100%
rename from quickstats/stylesheets/quick_default.mplstyle
rename to quickstats/resources/mpl_stylesheets/quick_default.mplstyle
-- 
GitLab


From a031232961ec67185f8a25ea35d2468392a2bc22 Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 08:25:08 +0100
Subject: [PATCH 03/18] new matplotlib stylesheets and rename default
 stylesheet

---
 quickstats/plots/__init__.py                          |  2 +-
 .../{quick_default.mplstyle => hep.mplstyle}          |  0
 .../resources/mpl_stylesheets/no_latex.mplstyle       |  0
 quickstats/resources/mpl_stylesheets/science.mplstyle | 11 +++++++++++
 4 files changed, 12 insertions(+), 1 deletion(-)
 rename quickstats/resources/mpl_stylesheets/{quick_default.mplstyle => hep.mplstyle} (100%)
 create mode 100644 quickstats/resources/mpl_stylesheets/no_latex.mplstyle
 create mode 100644 quickstats/resources/mpl_stylesheets/science.mplstyle

diff --git a/quickstats/plots/__init__.py b/quickstats/plots/__init__.py
index b55b4a6c..8827c86c 100644
--- a/quickstats/plots/__init__.py
+++ b/quickstats/plots/__init__.py
@@ -29,7 +29,7 @@ from matplotlib import style, colors
 
 style.core.USER_LIBRARY_PATHS.append(quickstats.stylesheet_path)
 style.core.reload_library()
-style.use("quick_default")
+style.use("hep")
 
 register_colors(EXTRA_COLORS)
 register_cmaps(QUICKSTATS_PALETTES)
\ No newline at end of file
diff --git a/quickstats/resources/mpl_stylesheets/quick_default.mplstyle b/quickstats/resources/mpl_stylesheets/hep.mplstyle
similarity index 100%
rename from quickstats/resources/mpl_stylesheets/quick_default.mplstyle
rename to quickstats/resources/mpl_stylesheets/hep.mplstyle
diff --git a/quickstats/resources/mpl_stylesheets/no_latex.mplstyle b/quickstats/resources/mpl_stylesheets/no_latex.mplstyle
new file mode 100644
index 00000000..e69de29b
diff --git a/quickstats/resources/mpl_stylesheets/science.mplstyle b/quickstats/resources/mpl_stylesheets/science.mplstyle
new file mode 100644
index 00000000..45540391
--- /dev/null
+++ b/quickstats/resources/mpl_stylesheets/science.mplstyle
@@ -0,0 +1,11 @@
+text.usetex: False
+mathtext.fontset : dejavuserif
+mathtext.default: it
+
+font.family : serif
+font.serif: DejaVu Serif, Times, Times Roman, Times New Roman, Nimbus Roman, FreeSerif
+font.monospace: Tex Gyre Cursor, Courier, Courier New, Nimbus Mono, FreeMono
+
+legend.frameon: False
+legend.framealpha: 0.75
+legend.fancybox: False
\ No newline at end of file
-- 
GitLab


From 9165ecd7a77253a732490f3a95e26e354b464ac4 Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 08:28:35 +0100
Subject: [PATCH 04/18] add decorator for checking module requirements

---
 quickstats/core/decorators.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/quickstats/core/decorators.py b/quickstats/core/decorators.py
index 95c9d700..0e4b96a7 100644
--- a/quickstats/core/decorators.py
+++ b/quickstats/core/decorators.py
@@ -1,5 +1,7 @@
+from typing import List
 from functools import partial
 import time
+import importlib
 
 class semistaticmethod(object):
     """
@@ -35,6 +37,18 @@ class semistaticmethod(object):
     def __func__(self):
         return self.f
 
+def require_package(pkg_names:str):
+    def check_package(func):
+        def wrapper(*args, **kwargs):
+            missing_pkgs = [name for name in pkg_names if importlib.util.find_spec(name) is None]
+            if missing_pkgs:
+                func_name = func.__name__
+                raise RuntimeError(f'missing modules required by the function {func_name}: {", ".join(missing_pkgs)}')
+            result = function(*args, **kwargs)
+            return result
+        return wrapper
+    return check_package
+        
 def cls_method_timer(func):
     """
     Decorator function to measure the execution time of a class method.
-- 
GitLab


From 5d67c449628c43dbaf3014d701884b55a7f86c0f Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 08:35:19 +0100
Subject: [PATCH 05/18] make reloading matplotlib stylesheet a subroutine

---
 quickstats/plots/__init__.py |  6 ++----
 quickstats/plots/core.py     | 11 ++++++++++-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/quickstats/plots/__init__.py b/quickstats/plots/__init__.py
index 8827c86c..722d53a1 100644
--- a/quickstats/plots/__init__.py
+++ b/quickstats/plots/__init__.py
@@ -26,10 +26,8 @@ from .bidirectional_bar_chart import BidirectionalBarChart
 from matplotlib import style, colors
 
 # Reference from https://github.com/beojan/atlas-mpl
-
-style.core.USER_LIBRARY_PATHS.append(quickstats.stylesheet_path)
-style.core.reload_library()
-style.use("hep")
+reload_styles()
+use_style('hep')
 
 register_colors(EXTRA_COLORS)
 register_cmaps(QUICKSTATS_PALETTES)
\ No newline at end of file
diff --git a/quickstats/plots/core.py b/quickstats/plots/core.py
index 5c4bae8c..2c1eaec6 100644
--- a/quickstats/plots/core.py
+++ b/quickstats/plots/core.py
@@ -3,6 +3,7 @@ from cycler import cycler
 
 import numpy as np
 
+import quickstats
 from quickstats import DescriptiveEnum
 from quickstats.utils.common_utils import combine_dict
 
@@ -144,4 +145,12 @@ def get_color_cycle(source:Optional[Union[List, str, "ListedColorMap"]]="default
         colors = source.colors
         return get_color_cycle(colors)
     return (cycler(color=source))
-    
\ No newline at end of file
+
+def reload_styles():
+    from matplotlib import style
+    style.core.USER_LIBRARY_PATHS.append(quickstats.stylesheet_path)
+    style.core.reload_library()
+
+def use_style(name:str='quick_default'):
+    from matplotlib import style
+    style.use(name)
\ No newline at end of file
-- 
GitLab


From 0b557e0df48eb79e62e8313b4cbb49d130d864e7 Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 08:36:39 +0100
Subject: [PATCH 06/18] allow more flexible text drawing in plots

---
 quickstats/plots/abstract_plot.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/quickstats/plots/abstract_plot.py b/quickstats/plots/abstract_plot.py
index 1d1daab3..6306bbe5 100644
--- a/quickstats/plots/abstract_plot.py
+++ b/quickstats/plots/abstract_plot.py
@@ -1,5 +1,6 @@
 from typing import Optional, Union, Dict, List, Tuple, Callable, Sequence
 from cycler import cycler
+from itertools import cycle
 
 import numpy as np
 import matplotlib
@@ -9,7 +10,7 @@ from quickstats.plots import get_color_cycle, get_cmap
 from quickstats.plots.color_schemes import QUICKSTATS_PALETTES
 from quickstats.plots.template import (single_frame, parse_styles, format_axis_ticks,
                                        parse_analysis_label_options, centralize_axis,
-                                       create_transform)
+                                       create_transform, draw_multiline_text)
 from quickstats.utils.common_utils import combine_dict, insert_periodic_substr
 from quickstats.maths.statistics import bin_center_to_bin_edge, get_hist_comparison_data
 from quickstats.maths.statistics import HistComparisonMode
@@ -87,6 +88,7 @@ class AbstractPlot(AbstractObject):
         if color_cycle is None:
             color_cycle = self.COLOR_CYCLE
         self.cmap = get_cmap(color_cycle)
+        self.color_cycle = cycle(self.cmap.colors)
         
     def get_hep_data(self):
         return combine_dict(self.hep_data)
@@ -212,11 +214,16 @@ class AbstractPlot(AbstractObject):
         if title is not None:
             ax.set_title(title, **self.styles['title'])
 
-    def draw_text(self, ax, text:str, x, y, transform_x:str="axis", transform_y:str="axis", **kwargs):
-        transform = create_transform(transform_x=transform_x,
-                                     transform_y=transform_y)
+    def draw_text(self, ax, text:str, x, y,
+                  dy:float=0.05,
+                  transform_x:str="axis",
+                  transform_y:str="axis",
+                  **kwargs):
         styles = combine_dict(self.styles['text'], kwargs)
-        ax.text(x, y, text, transform=transform, **styles)
+        draw_multiline_text(ax, x, y, text, dy=dy,
+                            transform_x=transform_x,
+                            transform_y=transform_y,
+                            **styles)
 
     def draw_cbar_label(self, cbar, cbarlabel:Optional[str]=None,
                         combined_styles:Optional[Dict]=None):
-- 
GitLab


From e323185ce9af1c79eb75ca6e687f41ba8bb0f131 Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 08:38:12 +0100
Subject: [PATCH 07/18] more organized way to draw errorbands in General1DPlot

---
 quickstats/plots/general_1D_plot.py | 39 +++++++++++++++++++----------
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/quickstats/plots/general_1D_plot.py b/quickstats/plots/general_1D_plot.py
index 53807464..c886ed92 100644
--- a/quickstats/plots/general_1D_plot.py
+++ b/quickstats/plots/general_1D_plot.py
@@ -8,13 +8,19 @@ from quickstats.utils.common_utils import combine_dict
 
 class General1DPlot(AbstractPlot):
 
-    CONFIG = {
-        'errorband_plot_styles': {
-             'alpha': 1,
-             'hatch': '/'
+    STYLES = {
+        'fill_between': {
+             'alpha': 0.3,
+             'hatch': None,
+             'linewidth': 1.0,
+             'edgecolor': 'k'
         }
     }
     
+    CONFIG = {
+        'errorband_legend': True
+    }
+    
     def __init__(self, data_map:Union[pd.DataFrame, Dict[str, pd.DataFrame]],
                  label_map:Optional[Dict]=None,
                  styles_map:Optional[Dict]=None,
@@ -58,21 +64,24 @@ class General1DPlot(AbstractPlot):
                          stat_configs:Optional[List[StatPlotConfig]]=None,
                          styles:Optional[Dict]=None,
                          label:Optional[str]=None):
-        x = data[xattrib].values
-        y = data[yattrib].values
+        data = data.reset_index()
+        x, y = data[xattrib].values, data[yattrib].values
         indices = np.argsort(x)
-        x = x[indices]
-        y = y[indices]
+        x, y = x[indices], y[indices]
         draw_styles = combine_dict(self.styles['plot'], styles)
-        
+        fill_styles = combine_dict(self.styles['fill_between'])
+        if ('color' in draw_styles) and ('color' not in fill_styles):
+            fill_styles['color'] = draw_styles['color']
+            
         if (yerrloattrib is not None) and (yerrhiattrib is not None):
             yerrlo = data[yerrloattrib][indices]
             yerrhi = data[yerrhiattrib][indices]
             handle_fill = ax.fill_between(x, yerrlo, yerrhi,
-                                          #label=...,
-                                          **self.config["errorband_plot_styles"])
+                                          **fill_styles)
+        else:
+            handle_fill = None
         
-        handle = ax.plot(x, y, **draw_styles, label=label)
+        handle_plot = ax.plot(x, y, **draw_styles, label=label)
         if stat_configs is not None:
             stat_handles = []
             for stat_config in stat_configs:
@@ -81,7 +90,11 @@ class General1DPlot(AbstractPlot):
                 stat_handles.append(stat_handle)
         else:
             stat_handles = None
-        return handle[0], stat_handles
+        if self.config['errorband_legend'] and (handle_fill is not None):
+            handles = (handle_plot[0], handle_fill)
+        else:
+            handles = handle_plot[0]
+        return handles, stat_handles
     
     def draw(self, xattrib:str, yattrib:str,
              yerrloattrib:Optional[str]=None,
-- 
GitLab


From 93154470425cde84cd6a74d7839433c2d2f88614 Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 08:39:40 +0100
Subject: [PATCH 08/18] ensure correct color cycle in two axis plot

---
 quickstats/plots/two_axis_1D_plot.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/quickstats/plots/two_axis_1D_plot.py b/quickstats/plots/two_axis_1D_plot.py
index db72f946..fae64a7d 100644
--- a/quickstats/plots/two_axis_1D_plot.py
+++ b/quickstats/plots/two_axis_1D_plot.py
@@ -48,6 +48,8 @@ class TwoAxis1DPlot(General1DPlot):
                 for target in targets:
                     data = self.data_map[target]
                     styles = styles_map.get(target, None)
+                    if styles is None:
+                        styles = {}
                     label = label_map.get(target, "")
                     if draw_stats:
                         if target in self.stat_configs:
@@ -58,6 +60,8 @@ class TwoAxis1DPlot(General1DPlot):
                             stat_configs = None
                     else:
                         stat_configs = None
+                    if ('color' not in styles):
+                        styles['color'] = next(self.color_cycle)
                     handle, stat_handles = self.draw_single_data(ax, data, 
                                                                  xattrib=xattrib,
                                                                  yattrib=yattrib,
@@ -96,4 +100,4 @@ class TwoAxis1DPlot(General1DPlot):
         self.set_axis_range(ax1, xmin=xmin, xmax=xmax, ymin=ymin_first, ymax=ymax_first, ypad=ypad_first)
         self.set_axis_range(ax2, xmin=xmin, xmax=xmax, ymin=ymin_second, ymax=ymax_second, ypad=ypad_second)
         
-        return ax1, ax2
+        return ax1, ax2
\ No newline at end of file
-- 
GitLab


From ca2428f6f2e42e31c4e94dd79f2d4242bb54fdc9 Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 08:40:12 +0100
Subject: [PATCH 09/18] bugfix logy for second axis not taking effect

---
 quickstats/plots/two_axis_1D_plot.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/quickstats/plots/two_axis_1D_plot.py b/quickstats/plots/two_axis_1D_plot.py
index fae64a7d..6206e4a0 100644
--- a/quickstats/plots/two_axis_1D_plot.py
+++ b/quickstats/plots/two_axis_1D_plot.py
@@ -32,6 +32,8 @@ class TwoAxis1DPlot(General1DPlot):
         
         ax1 = self.draw_frame(logx=logx, logy=logy_first)
         ax2 = ax1.twinx()
+        if logy_second:
+            ax2.set_yscale('log')
         
         legend_order = []
         if isinstance(self.data_map, dict):
-- 
GitLab


From 5d2c628f9db6a323ec5c78079a9a0d1759081b28 Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 08:41:52 +0100
Subject: [PATCH 10/18] update version

---
 quickstats/_version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/quickstats/_version.py b/quickstats/_version.py
index 3b6ad5b9..06101b2d 100644
--- a/quickstats/_version.py
+++ b/quickstats/_version.py
@@ -1 +1 @@
-__version__ = "0.6.9.7"
+__version__ = "0.6.9.8"
-- 
GitLab


From 3c6f78ddb359ad27301a1c71f1b08ed0f2e49874 Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 09:13:25 +0100
Subject: [PATCH 11/18] add utils for handling paths

---
 quickstats/utils/path_utils.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 quickstats/utils/path_utils.py

diff --git a/quickstats/utils/path_utils.py b/quickstats/utils/path_utils.py
new file mode 100644
index 00000000..0782d924
--- /dev/null
+++ b/quickstats/utils/path_utils.py
@@ -0,0 +1,17 @@
+import glob
+
+from typing import List, Union
+from pathlib import Path
+
+from .string_utils import split_str
+
+def resolve_paths(paths:Union[str, List[str]],
+                  sep:str=","):
+    if isinstance(paths, str):
+        paths = split_str(paths, strip=True, remove_empty=True)
+        return resolve_paths(paths, sep=sep)
+    resolved_paths = []
+    for path in paths:
+        resolved_paths.extend(glob.glob(path))
+    return resolved_paths
+
-- 
GitLab


From 25e906fa3f4e88332079fccf7932fdeb75c00dce Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 09:14:03 +0100
Subject: [PATCH 12/18] use path resolving subroutine for listing root files

---
 quickstats/interface/root/TFile.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/quickstats/interface/root/TFile.py b/quickstats/interface/root/TFile.py
index 82ed9d97..667f035f 100644
--- a/quickstats/interface/root/TFile.py
+++ b/quickstats/interface/root/TFile.py
@@ -6,6 +6,7 @@ import glob
 import numpy as np
 
 from quickstats import semistaticmethod
+from quickstats.utils.path_utils import resolve_paths
 from .TObject import TObject
 
 class TFile(TObject):
@@ -37,15 +38,13 @@ class TFile(TObject):
     @semistaticmethod
     def list_files(self, paths:Union[List[str], str],
                    strict_format:Optional[bool]=True):
-        if isinstance(paths, str):
-            return self.list_files([paths])
+        paths = resolve_paths(paths)
         filenames = []
         for path in paths:
             if os.path.isdir(path):
                 filenames.extend(glob.glob(os.path.join(path, "*")))
             else:
-                filenames.extend(glob.glob(path))
-        filenames = [filename for filename in filenames if os.path.isfile(filename)]
+                filenames.appentd(path)
         if strict_format:
             filenames = [filename for filename in filenames if self._is_valid_filename(filename)]
         if not filenames:
-- 
GitLab


From 9c208f3649b17c32d4c40f9fcbff2bbf340f0d1b Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 09:16:39 +0100
Subject: [PATCH 13/18] bugfix missing argument

---
 quickstats/utils/path_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/quickstats/utils/path_utils.py b/quickstats/utils/path_utils.py
index 0782d924..e4b507a2 100644
--- a/quickstats/utils/path_utils.py
+++ b/quickstats/utils/path_utils.py
@@ -8,7 +8,7 @@ from .string_utils import split_str
 def resolve_paths(paths:Union[str, List[str]],
                   sep:str=","):
     if isinstance(paths, str):
-        paths = split_str(paths, strip=True, remove_empty=True)
+        paths = split_str(paths, sep=sep, strip=True, remove_empty=True)
         return resolve_paths(paths, sep=sep)
     resolved_paths = []
     for path in paths:
-- 
GitLab


From 2245b413e25dc3461ca9990c8d3b0b6e5532bc29 Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 09:33:25 +0100
Subject: [PATCH 14/18] add methods for adding/removing python paths

---
 quickstats/utils/sys_utils.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 quickstats/utils/sys_utils.py

diff --git a/quickstats/utils/sys_utils.py b/quickstats/utils/sys_utils.py
new file mode 100644
index 00000000..409d9b3f
--- /dev/null
+++ b/quickstats/utils/sys_utils.py
@@ -0,0 +1,17 @@
+import os
+import sys
+
+def add_python_path(path:str):
+    if path not in sys.path:
+        sys.path.insert(0, path)
+    PYTHONPATH = os.environ.get("PYTHONPATH", "")
+    if path not in PYTHONPATH.split(":"):
+        os.enviro['PYTHONPATH'] = f"{path}:{PYTHONPATH}"
+
+def remove_python_path(path:str):
+    if path in sys.path:
+        sys.path.remove(path)
+    PYTHONPATHS = os.environ.get("PYTHONPATH", "").split(":")
+    if path in PYTHONPATHS:
+        PYTHONPATHS.remove(path)
+        os.environ["PYTHONPATH"] = ":".join(PYTHONPATHS)
\ No newline at end of file
-- 
GitLab


From cea4f3ef281b445220512ad96972b2061adff0ab Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 09:53:17 +0100
Subject: [PATCH 15/18] add method for modifying sys argv

---
 quickstats/utils/sys_utils.py | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/quickstats/utils/sys_utils.py b/quickstats/utils/sys_utils.py
index 409d9b3f..871da7ad 100644
--- a/quickstats/utils/sys_utils.py
+++ b/quickstats/utils/sys_utils.py
@@ -1,7 +1,16 @@
 import os
 import sys
+import shlex
+
+from typing import Optional
 
 def add_python_path(path:str):
+    """
+    Add a path to the Python search path and the PYTHONPATH environment variable if not already present.
+    
+    Parameters:
+    - path (str): Path to add.
+    """
     if path not in sys.path:
         sys.path.insert(0, path)
     PYTHONPATH = os.environ.get("PYTHONPATH", "")
@@ -9,9 +18,27 @@ def add_python_path(path:str):
         os.enviro['PYTHONPATH'] = f"{path}:{PYTHONPATH}"
 
 def remove_python_path(path:str):
+    """
+    Remove a path from the Python search path and the PYTHONPATH environment variable if present.
+    """
     if path in sys.path:
         sys.path.remove(path)
     PYTHONPATHS = os.environ.get("PYTHONPATH", "").split(":")
     if path in PYTHONPATHS:
         PYTHONPATHS.remove(path)
-        os.environ["PYTHONPATH"] = ":".join(PYTHONPATHS)
\ No newline at end of file
+        os.environ["PYTHONPATH"] = ":".join(PYTHONPATHS)
+
+def set_argv(cmd: str, expandvars:bool=True):
+    """
+    Modifies sys.argv based on a given command line string.
+
+    Parameters:
+    - cmd (str): The command line string to parse into sys.argv.
+    - expandvars (bool, optional): Whether to expand environment variables in cmd. Defaults to False.
+    """
+    if expandvars:
+        cmd = os.path.expandvars(cmd)
+    # Use shlex.split to correctly parse the command line string into arguments,
+    # handling cases with quotes and escaped characters appropriately.
+    parsed_args = shlex.split(cmd)
+    sys.argv = parsed_args
\ No newline at end of file
-- 
GitLab


From a54d59e29aac2aa2e4f8657890ad65dcd4a9f592 Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 13:04:58 +0100
Subject: [PATCH 16/18] fix typo

---
 quickstats/interface/root/TFile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/quickstats/interface/root/TFile.py b/quickstats/interface/root/TFile.py
index 667f035f..2e6f124f 100644
--- a/quickstats/interface/root/TFile.py
+++ b/quickstats/interface/root/TFile.py
@@ -44,7 +44,7 @@ class TFile(TObject):
             if os.path.isdir(path):
                 filenames.extend(glob.glob(os.path.join(path, "*")))
             else:
-                filenames.appentd(path)
+                filenames.append(path)
         if strict_format:
             filenames = [filename for filename in filenames if self._is_valid_filename(filename)]
         if not filenames:
-- 
GitLab


From da5ed298207071c5f942e9472d564d7e6a52607e Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Sun, 17 Mar 2024 13:40:24 +0100
Subject: [PATCH 17/18] fix missing data type for uproot conversion

---
 quickstats/utils/data_conversion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/quickstats/utils/data_conversion.py b/quickstats/utils/data_conversion.py
index c9660b89..b5fcc47c 100644
--- a/quickstats/utils/data_conversion.py
+++ b/quickstats/utils/data_conversion.py
@@ -18,7 +18,7 @@ root_datatypes = ["bool", "Bool_t", "Byte_t", "char", "char*", "Char_t",
                   "unsigned long", "unsigned long long",
                   "unsigned short", "UShort_t"]
 
-uproot_datatypes = ["double", "float", "int", "int8_t", "int64_t", "char*", "int32_t", "uint64_t", "uint32_t"]
+uproot_datatypes = ["bool", "double", "float", "int", "int8_t", "int64_t", "char*", "int32_t", "uint64_t", "uint32_t"]
 
 class ConversionMode(DescriptiveEnum):
     ALL = (0, "Convert all variable types")
-- 
GitLab


From de022ad7f630c36dd1f6c061ab1447fe497c40dc Mon Sep 17 00:00:00 2001
From: Chi Lung Cheng <chi.lung.cheng@cern.ch>
Date: Mon, 18 Mar 2024 13:47:48 +0100
Subject: [PATCH 18/18] fix missing option

---
 quickstats/clis/workspace_tools.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/quickstats/clis/workspace_tools.py b/quickstats/clis/workspace_tools.py
index 1c14c257..5673fdf8 100644
--- a/quickstats/clis/workspace_tools.py
+++ b/quickstats/clis/workspace_tools.py
@@ -65,6 +65,8 @@ def inspect_ws(input_file, ws_name=None, data_name=None, mc_name=None, output_fi
               help='Enable minimizer offsetting.')
 @click.option('--offset/--no-offset', default=True, show_default=True,
               help='Offset likelihood.')
+@click.option('--use-binned/--use-unbinned', default=False, show_default=True,
+              help='Whether to convert unbinned dataset to binned dataset.')
 @click.option('-c', '--num_cpu', type=int, default=1, show_default=True,
               help='Number of CPUs to use during minimization.')
 @click.option('--apply-fix/--do-not-apply-fix', default=False, show_default=True,
-- 
GitLab