From eb6684a5fdc749d542924b2435d687d71f216d39 Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 08:17:11 +0100 Subject: [PATCH 01/18] add dataframe extension modules --- quickstats/extensions/__init__.py | 3 + quickstats/extensions/extension_dataframe.py | 102 +++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 quickstats/extensions/__init__.py create mode 100644 quickstats/extensions/extension_dataframe.py diff --git a/quickstats/extensions/__init__.py b/quickstats/extensions/__init__.py new file mode 100644 index 00000000..cb69ec0c --- /dev/null +++ b/quickstats/extensions/__init__.py @@ -0,0 +1,3 @@ +__all__ = ["ExtensionDataFrame"] + +from .extension_dataframe import ExtensionDataFrame \ No newline at end of file diff --git a/quickstats/extensions/extension_dataframe.py b/quickstats/extensions/extension_dataframe.py new file mode 100644 index 00000000..58d6c8b8 --- /dev/null +++ b/quickstats/extensions/extension_dataframe.py @@ -0,0 +1,102 @@ +from quickstats import AbstractObject + +class ExtensionDataFrame(AbstractObject): + + @property + def dataframe(self): + return self._df + + def __init__(self, df:"pandas.DataFrame", verbosity:str="INFO"): + super().__init__(verbosity=verbosity) + self._df = df + + def __repr__(self): + return self.dataframe.__repr__() + + def _repr_html_(self): + return self.dataframe._repr_html_() + + def _parse_argument(self, ops_name:str, index=None, columns=None, axis=None, **kwargs): + assert len(kwargs) == 1 + argname = list(kwargs)[0] + argval = kwargs[argname] + if (argval is None) and (index is None) and (columns is None): + raise TypeError(f"must pass an index to {ops_name}") + if (index is not None) or (columns is not None): + if axis is not None: + raise TypeError("cannot specify both 'axis' and any of 'index' or 'columns'") + if argval is not None: + raise TypeError(f"cannot specify both '{argname}' and any or 'index' or 'columns'") + else: + if axis and self.df._get_axis_number(axis) == 1: + columns = argval + else: + index = argval + return index, columns + + + def select_values(self, mapper=None, index=None, columns=None, axis=None, copy:bool=True, inplace:bool=False, invert:bool=False): + df = self._df + index, columns = self._parse_argument('select', index=index, columns=columns, + axis=axis, mapper=mapper) + # does not consider copy on write yet + result = self._df if inplace else self._df.copy(deep=copy) + index_ops = lambda x: result.index.get_level_values(x) + column_ops = lambda x: result[x] + for ops, maps in [(index_ops, index), (column_ops, columns)]: + if maps is None: + continue + for attribs, selections in maps.items(): + if not isinstance(attribs, tuple): + attribs = (attribs,) + selections = (selections,) + masks = None + for i, attrib in enumerate(attribs): + selection = selections[i] + if not callable(selection): + if isinstance(selection, (tuple, list)): + mask = ops(attrib).isin(selection) + else: + mask = ops(attrib) == selection + elif attrib is not None: + mask = ops(attrib).apply(selection) + else: + mask = result.apply(selection) + if masks is None: + masks = mask + else: + masks &= mask + if invert: + masks = ~masks + result = result[masks] + if inplace: + self._df = result + return None + return result + + def reject_values(self, mapper=None, index=None, columns=None, axis=None, copy:bool=True, inplace:bool=False): + return self.select_values(mapper=mapper, index=index, + columns=columns, axis=axis, + copy=copy, inplace=inplace, + invert=True) + + def concat(self, other, copy:bool=True, inplace:bool=False, order:str="first", **kwargs): + import pandas as pd + result = self._df if inplace else self._df.copy(deep=copy) + if order == "first": + if isinstance(other, list): + objs = [result] + other + else: + objs = [result, other] + elif order == "last": + if isinstance(other, list): + objs = other + [result] + else: + objs = [other, result] + else: + raise TypeError('order must be either "first" or "last"') + result = pd.concat(objs, **kwargs) + if inplace: + self._df = result + return None + return result \ No newline at end of file -- GitLab From ba54eb0cdded5c903f632190ce204a273b669e3b Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 08:23:09 +0100 Subject: [PATCH 02/18] restructure resource files --- quickstats/__init__.py | 2 +- .../mpl_stylesheets}/quick_default.mplstyle | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename quickstats/{stylesheets => resources/mpl_stylesheets}/quick_default.mplstyle (100%) diff --git a/quickstats/__init__.py b/quickstats/__init__.py index 3f9af632..ac476c09 100644 --- a/quickstats/__init__.py +++ b/quickstats/__init__.py @@ -16,8 +16,8 @@ os.environ['CPPYY_API_PATH'] = "none" module_path = pathlib.Path(__file__).parent.absolute() macro_path = os.path.join(module_path, 'macros') -stylesheet_path = os.path.join(module_path, 'stylesheets') resource_path = os.path.join(module_path, 'resources') +stylesheet_path = os.path.join(resource_path, 'mpl_stylesheets') # ROOT.gInterpreter.AddIncludePath(os.path.join(macro_path, "macros")) diff --git a/quickstats/stylesheets/quick_default.mplstyle b/quickstats/resources/mpl_stylesheets/quick_default.mplstyle similarity index 100% rename from quickstats/stylesheets/quick_default.mplstyle rename to quickstats/resources/mpl_stylesheets/quick_default.mplstyle -- GitLab From a031232961ec67185f8a25ea35d2468392a2bc22 Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 08:25:08 +0100 Subject: [PATCH 03/18] new matplotlib stylesheets and rename default stylesheet --- quickstats/plots/__init__.py | 2 +- .../{quick_default.mplstyle => hep.mplstyle} | 0 .../resources/mpl_stylesheets/no_latex.mplstyle | 0 quickstats/resources/mpl_stylesheets/science.mplstyle | 11 +++++++++++ 4 files changed, 12 insertions(+), 1 deletion(-) rename quickstats/resources/mpl_stylesheets/{quick_default.mplstyle => hep.mplstyle} (100%) create mode 100644 quickstats/resources/mpl_stylesheets/no_latex.mplstyle create mode 100644 quickstats/resources/mpl_stylesheets/science.mplstyle diff --git a/quickstats/plots/__init__.py b/quickstats/plots/__init__.py index b55b4a6c..8827c86c 100644 --- a/quickstats/plots/__init__.py +++ b/quickstats/plots/__init__.py @@ -29,7 +29,7 @@ from matplotlib import style, colors style.core.USER_LIBRARY_PATHS.append(quickstats.stylesheet_path) style.core.reload_library() -style.use("quick_default") +style.use("hep") register_colors(EXTRA_COLORS) register_cmaps(QUICKSTATS_PALETTES) \ No newline at end of file diff --git a/quickstats/resources/mpl_stylesheets/quick_default.mplstyle b/quickstats/resources/mpl_stylesheets/hep.mplstyle similarity index 100% rename from quickstats/resources/mpl_stylesheets/quick_default.mplstyle rename to quickstats/resources/mpl_stylesheets/hep.mplstyle diff --git a/quickstats/resources/mpl_stylesheets/no_latex.mplstyle b/quickstats/resources/mpl_stylesheets/no_latex.mplstyle new file mode 100644 index 00000000..e69de29b diff --git a/quickstats/resources/mpl_stylesheets/science.mplstyle b/quickstats/resources/mpl_stylesheets/science.mplstyle new file mode 100644 index 00000000..45540391 --- /dev/null +++ b/quickstats/resources/mpl_stylesheets/science.mplstyle @@ -0,0 +1,11 @@ +text.usetex: False +mathtext.fontset : dejavuserif +mathtext.default: it + +font.family : serif +font.serif: DejaVu Serif, Times, Times Roman, Times New Roman, Nimbus Roman, FreeSerif +font.monospace: Tex Gyre Cursor, Courier, Courier New, Nimbus Mono, FreeMono + +legend.frameon: False +legend.framealpha: 0.75 +legend.fancybox: False \ No newline at end of file -- GitLab From 9165ecd7a77253a732490f3a95e26e354b464ac4 Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 08:28:35 +0100 Subject: [PATCH 04/18] add decorator for checking module requirements --- quickstats/core/decorators.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/quickstats/core/decorators.py b/quickstats/core/decorators.py index 95c9d700..0e4b96a7 100644 --- a/quickstats/core/decorators.py +++ b/quickstats/core/decorators.py @@ -1,5 +1,7 @@ +from typing import List from functools import partial import time +import importlib class semistaticmethod(object): """ @@ -35,6 +37,18 @@ class semistaticmethod(object): def __func__(self): return self.f +def require_package(pkg_names:str): + def check_package(func): + def wrapper(*args, **kwargs): + missing_pkgs = [name for name in pkg_names if importlib.util.find_spec(name) is None] + if missing_pkgs: + func_name = func.__name__ + raise RuntimeError(f'missing modules required by the function {func_name}: {", ".join(missing_pkgs)}') + result = function(*args, **kwargs) + return result + return wrapper + return check_package + def cls_method_timer(func): """ Decorator function to measure the execution time of a class method. -- GitLab From 5d67c449628c43dbaf3014d701884b55a7f86c0f Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 08:35:19 +0100 Subject: [PATCH 05/18] make reloading matplotlib stylesheet a subroutine --- quickstats/plots/__init__.py | 6 ++---- quickstats/plots/core.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/quickstats/plots/__init__.py b/quickstats/plots/__init__.py index 8827c86c..722d53a1 100644 --- a/quickstats/plots/__init__.py +++ b/quickstats/plots/__init__.py @@ -26,10 +26,8 @@ from .bidirectional_bar_chart import BidirectionalBarChart from matplotlib import style, colors # Reference from https://github.com/beojan/atlas-mpl - -style.core.USER_LIBRARY_PATHS.append(quickstats.stylesheet_path) -style.core.reload_library() -style.use("hep") +reload_styles() +use_style('hep') register_colors(EXTRA_COLORS) register_cmaps(QUICKSTATS_PALETTES) \ No newline at end of file diff --git a/quickstats/plots/core.py b/quickstats/plots/core.py index 5c4bae8c..2c1eaec6 100644 --- a/quickstats/plots/core.py +++ b/quickstats/plots/core.py @@ -3,6 +3,7 @@ from cycler import cycler import numpy as np +import quickstats from quickstats import DescriptiveEnum from quickstats.utils.common_utils import combine_dict @@ -144,4 +145,12 @@ def get_color_cycle(source:Optional[Union[List, str, "ListedColorMap"]]="default colors = source.colors return get_color_cycle(colors) return (cycler(color=source)) - \ No newline at end of file + +def reload_styles(): + from matplotlib import style + style.core.USER_LIBRARY_PATHS.append(quickstats.stylesheet_path) + style.core.reload_library() + +def use_style(name:str='quick_default'): + from matplotlib import style + style.use(name) \ No newline at end of file -- GitLab From 0b557e0df48eb79e62e8313b4cbb49d130d864e7 Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 08:36:39 +0100 Subject: [PATCH 06/18] allow more flexible text drawing in plots --- quickstats/plots/abstract_plot.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/quickstats/plots/abstract_plot.py b/quickstats/plots/abstract_plot.py index 1d1daab3..6306bbe5 100644 --- a/quickstats/plots/abstract_plot.py +++ b/quickstats/plots/abstract_plot.py @@ -1,5 +1,6 @@ from typing import Optional, Union, Dict, List, Tuple, Callable, Sequence from cycler import cycler +from itertools import cycle import numpy as np import matplotlib @@ -9,7 +10,7 @@ from quickstats.plots import get_color_cycle, get_cmap from quickstats.plots.color_schemes import QUICKSTATS_PALETTES from quickstats.plots.template import (single_frame, parse_styles, format_axis_ticks, parse_analysis_label_options, centralize_axis, - create_transform) + create_transform, draw_multiline_text) from quickstats.utils.common_utils import combine_dict, insert_periodic_substr from quickstats.maths.statistics import bin_center_to_bin_edge, get_hist_comparison_data from quickstats.maths.statistics import HistComparisonMode @@ -87,6 +88,7 @@ class AbstractPlot(AbstractObject): if color_cycle is None: color_cycle = self.COLOR_CYCLE self.cmap = get_cmap(color_cycle) + self.color_cycle = cycle(self.cmap.colors) def get_hep_data(self): return combine_dict(self.hep_data) @@ -212,11 +214,16 @@ class AbstractPlot(AbstractObject): if title is not None: ax.set_title(title, **self.styles['title']) - def draw_text(self, ax, text:str, x, y, transform_x:str="axis", transform_y:str="axis", **kwargs): - transform = create_transform(transform_x=transform_x, - transform_y=transform_y) + def draw_text(self, ax, text:str, x, y, + dy:float=0.05, + transform_x:str="axis", + transform_y:str="axis", + **kwargs): styles = combine_dict(self.styles['text'], kwargs) - ax.text(x, y, text, transform=transform, **styles) + draw_multiline_text(ax, x, y, text, dy=dy, + transform_x=transform_x, + transform_y=transform_y, + **styles) def draw_cbar_label(self, cbar, cbarlabel:Optional[str]=None, combined_styles:Optional[Dict]=None): -- GitLab From e323185ce9af1c79eb75ca6e687f41ba8bb0f131 Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 08:38:12 +0100 Subject: [PATCH 07/18] more organized way to draw errorbands in General1DPlot --- quickstats/plots/general_1D_plot.py | 39 +++++++++++++++++++---------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/quickstats/plots/general_1D_plot.py b/quickstats/plots/general_1D_plot.py index 53807464..c886ed92 100644 --- a/quickstats/plots/general_1D_plot.py +++ b/quickstats/plots/general_1D_plot.py @@ -8,13 +8,19 @@ from quickstats.utils.common_utils import combine_dict class General1DPlot(AbstractPlot): - CONFIG = { - 'errorband_plot_styles': { - 'alpha': 1, - 'hatch': '/' + STYLES = { + 'fill_between': { + 'alpha': 0.3, + 'hatch': None, + 'linewidth': 1.0, + 'edgecolor': 'k' } } + CONFIG = { + 'errorband_legend': True + } + def __init__(self, data_map:Union[pd.DataFrame, Dict[str, pd.DataFrame]], label_map:Optional[Dict]=None, styles_map:Optional[Dict]=None, @@ -58,21 +64,24 @@ class General1DPlot(AbstractPlot): stat_configs:Optional[List[StatPlotConfig]]=None, styles:Optional[Dict]=None, label:Optional[str]=None): - x = data[xattrib].values - y = data[yattrib].values + data = data.reset_index() + x, y = data[xattrib].values, data[yattrib].values indices = np.argsort(x) - x = x[indices] - y = y[indices] + x, y = x[indices], y[indices] draw_styles = combine_dict(self.styles['plot'], styles) - + fill_styles = combine_dict(self.styles['fill_between']) + if ('color' in draw_styles) and ('color' not in fill_styles): + fill_styles['color'] = draw_styles['color'] + if (yerrloattrib is not None) and (yerrhiattrib is not None): yerrlo = data[yerrloattrib][indices] yerrhi = data[yerrhiattrib][indices] handle_fill = ax.fill_between(x, yerrlo, yerrhi, - #label=..., - **self.config["errorband_plot_styles"]) + **fill_styles) + else: + handle_fill = None - handle = ax.plot(x, y, **draw_styles, label=label) + handle_plot = ax.plot(x, y, **draw_styles, label=label) if stat_configs is not None: stat_handles = [] for stat_config in stat_configs: @@ -81,7 +90,11 @@ class General1DPlot(AbstractPlot): stat_handles.append(stat_handle) else: stat_handles = None - return handle[0], stat_handles + if self.config['errorband_legend'] and (handle_fill is not None): + handles = (handle_plot[0], handle_fill) + else: + handles = handle_plot[0] + return handles, stat_handles def draw(self, xattrib:str, yattrib:str, yerrloattrib:Optional[str]=None, -- GitLab From 93154470425cde84cd6a74d7839433c2d2f88614 Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 08:39:40 +0100 Subject: [PATCH 08/18] ensure correct color cycle in two axis plot --- quickstats/plots/two_axis_1D_plot.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/quickstats/plots/two_axis_1D_plot.py b/quickstats/plots/two_axis_1D_plot.py index db72f946..fae64a7d 100644 --- a/quickstats/plots/two_axis_1D_plot.py +++ b/quickstats/plots/two_axis_1D_plot.py @@ -48,6 +48,8 @@ class TwoAxis1DPlot(General1DPlot): for target in targets: data = self.data_map[target] styles = styles_map.get(target, None) + if styles is None: + styles = {} label = label_map.get(target, "") if draw_stats: if target in self.stat_configs: @@ -58,6 +60,8 @@ class TwoAxis1DPlot(General1DPlot): stat_configs = None else: stat_configs = None + if ('color' not in styles): + styles['color'] = next(self.color_cycle) handle, stat_handles = self.draw_single_data(ax, data, xattrib=xattrib, yattrib=yattrib, @@ -96,4 +100,4 @@ class TwoAxis1DPlot(General1DPlot): self.set_axis_range(ax1, xmin=xmin, xmax=xmax, ymin=ymin_first, ymax=ymax_first, ypad=ypad_first) self.set_axis_range(ax2, xmin=xmin, xmax=xmax, ymin=ymin_second, ymax=ymax_second, ypad=ypad_second) - return ax1, ax2 + return ax1, ax2 \ No newline at end of file -- GitLab From ca2428f6f2e42e31c4e94dd79f2d4242bb54fdc9 Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 08:40:12 +0100 Subject: [PATCH 09/18] bugfix logy for second axis not taking effect --- quickstats/plots/two_axis_1D_plot.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/quickstats/plots/two_axis_1D_plot.py b/quickstats/plots/two_axis_1D_plot.py index fae64a7d..6206e4a0 100644 --- a/quickstats/plots/two_axis_1D_plot.py +++ b/quickstats/plots/two_axis_1D_plot.py @@ -32,6 +32,8 @@ class TwoAxis1DPlot(General1DPlot): ax1 = self.draw_frame(logx=logx, logy=logy_first) ax2 = ax1.twinx() + if logy_second: + ax2.set_yscale('log') legend_order = [] if isinstance(self.data_map, dict): -- GitLab From 5d2c628f9db6a323ec5c78079a9a0d1759081b28 Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 08:41:52 +0100 Subject: [PATCH 10/18] update version --- quickstats/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quickstats/_version.py b/quickstats/_version.py index 3b6ad5b9..06101b2d 100644 --- a/quickstats/_version.py +++ b/quickstats/_version.py @@ -1 +1 @@ -__version__ = "0.6.9.7" +__version__ = "0.6.9.8" -- GitLab From 3c6f78ddb359ad27301a1c71f1b08ed0f2e49874 Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 09:13:25 +0100 Subject: [PATCH 11/18] add utils for handling paths --- quickstats/utils/path_utils.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 quickstats/utils/path_utils.py diff --git a/quickstats/utils/path_utils.py b/quickstats/utils/path_utils.py new file mode 100644 index 00000000..0782d924 --- /dev/null +++ b/quickstats/utils/path_utils.py @@ -0,0 +1,17 @@ +import glob + +from typing import List, Union +from pathlib import Path + +from .string_utils import split_str + +def resolve_paths(paths:Union[str, List[str]], + sep:str=","): + if isinstance(paths, str): + paths = split_str(paths, strip=True, remove_empty=True) + return resolve_paths(paths, sep=sep) + resolved_paths = [] + for path in paths: + resolved_paths.extend(glob.glob(path)) + return resolved_paths + -- GitLab From 25e906fa3f4e88332079fccf7932fdeb75c00dce Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 09:14:03 +0100 Subject: [PATCH 12/18] use path resolving subroutine for listing root files --- quickstats/interface/root/TFile.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/quickstats/interface/root/TFile.py b/quickstats/interface/root/TFile.py index 82ed9d97..667f035f 100644 --- a/quickstats/interface/root/TFile.py +++ b/quickstats/interface/root/TFile.py @@ -6,6 +6,7 @@ import glob import numpy as np from quickstats import semistaticmethod +from quickstats.utils.path_utils import resolve_paths from .TObject import TObject class TFile(TObject): @@ -37,15 +38,13 @@ class TFile(TObject): @semistaticmethod def list_files(self, paths:Union[List[str], str], strict_format:Optional[bool]=True): - if isinstance(paths, str): - return self.list_files([paths]) + paths = resolve_paths(paths) filenames = [] for path in paths: if os.path.isdir(path): filenames.extend(glob.glob(os.path.join(path, "*"))) else: - filenames.extend(glob.glob(path)) - filenames = [filename for filename in filenames if os.path.isfile(filename)] + filenames.appentd(path) if strict_format: filenames = [filename for filename in filenames if self._is_valid_filename(filename)] if not filenames: -- GitLab From 9c208f3649b17c32d4c40f9fcbff2bbf340f0d1b Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 09:16:39 +0100 Subject: [PATCH 13/18] bugfix missing argument --- quickstats/utils/path_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quickstats/utils/path_utils.py b/quickstats/utils/path_utils.py index 0782d924..e4b507a2 100644 --- a/quickstats/utils/path_utils.py +++ b/quickstats/utils/path_utils.py @@ -8,7 +8,7 @@ from .string_utils import split_str def resolve_paths(paths:Union[str, List[str]], sep:str=","): if isinstance(paths, str): - paths = split_str(paths, strip=True, remove_empty=True) + paths = split_str(paths, sep=sep, strip=True, remove_empty=True) return resolve_paths(paths, sep=sep) resolved_paths = [] for path in paths: -- GitLab From 2245b413e25dc3461ca9990c8d3b0b6e5532bc29 Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 09:33:25 +0100 Subject: [PATCH 14/18] add methods for adding/removing python paths --- quickstats/utils/sys_utils.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 quickstats/utils/sys_utils.py diff --git a/quickstats/utils/sys_utils.py b/quickstats/utils/sys_utils.py new file mode 100644 index 00000000..409d9b3f --- /dev/null +++ b/quickstats/utils/sys_utils.py @@ -0,0 +1,17 @@ +import os +import sys + +def add_python_path(path:str): + if path not in sys.path: + sys.path.insert(0, path) + PYTHONPATH = os.environ.get("PYTHONPATH", "") + if path not in PYTHONPATH.split(":"): + os.enviro['PYTHONPATH'] = f"{path}:{PYTHONPATH}" + +def remove_python_path(path:str): + if path in sys.path: + sys.path.remove(path) + PYTHONPATHS = os.environ.get("PYTHONPATH", "").split(":") + if path in PYTHONPATHS: + PYTHONPATHS.remove(path) + os.environ["PYTHONPATH"] = ":".join(PYTHONPATHS) \ No newline at end of file -- GitLab From cea4f3ef281b445220512ad96972b2061adff0ab Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 09:53:17 +0100 Subject: [PATCH 15/18] add method for modifying sys argv --- quickstats/utils/sys_utils.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/quickstats/utils/sys_utils.py b/quickstats/utils/sys_utils.py index 409d9b3f..871da7ad 100644 --- a/quickstats/utils/sys_utils.py +++ b/quickstats/utils/sys_utils.py @@ -1,7 +1,16 @@ import os import sys +import shlex + +from typing import Optional def add_python_path(path:str): + """ + Add a path to the Python search path and the PYTHONPATH environment variable if not already present. + + Parameters: + - path (str): Path to add. + """ if path not in sys.path: sys.path.insert(0, path) PYTHONPATH = os.environ.get("PYTHONPATH", "") @@ -9,9 +18,27 @@ def add_python_path(path:str): os.enviro['PYTHONPATH'] = f"{path}:{PYTHONPATH}" def remove_python_path(path:str): + """ + Remove a path from the Python search path and the PYTHONPATH environment variable if present. + """ if path in sys.path: sys.path.remove(path) PYTHONPATHS = os.environ.get("PYTHONPATH", "").split(":") if path in PYTHONPATHS: PYTHONPATHS.remove(path) - os.environ["PYTHONPATH"] = ":".join(PYTHONPATHS) \ No newline at end of file + os.environ["PYTHONPATH"] = ":".join(PYTHONPATHS) + +def set_argv(cmd: str, expandvars:bool=True): + """ + Modifies sys.argv based on a given command line string. + + Parameters: + - cmd (str): The command line string to parse into sys.argv. + - expandvars (bool, optional): Whether to expand environment variables in cmd. Defaults to False. + """ + if expandvars: + cmd = os.path.expandvars(cmd) + # Use shlex.split to correctly parse the command line string into arguments, + # handling cases with quotes and escaped characters appropriately. + parsed_args = shlex.split(cmd) + sys.argv = parsed_args \ No newline at end of file -- GitLab From a54d59e29aac2aa2e4f8657890ad65dcd4a9f592 Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 13:04:58 +0100 Subject: [PATCH 16/18] fix typo --- quickstats/interface/root/TFile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quickstats/interface/root/TFile.py b/quickstats/interface/root/TFile.py index 667f035f..2e6f124f 100644 --- a/quickstats/interface/root/TFile.py +++ b/quickstats/interface/root/TFile.py @@ -44,7 +44,7 @@ class TFile(TObject): if os.path.isdir(path): filenames.extend(glob.glob(os.path.join(path, "*"))) else: - filenames.appentd(path) + filenames.append(path) if strict_format: filenames = [filename for filename in filenames if self._is_valid_filename(filename)] if not filenames: -- GitLab From da5ed298207071c5f942e9472d564d7e6a52607e Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Sun, 17 Mar 2024 13:40:24 +0100 Subject: [PATCH 17/18] fix missing data type for uproot conversion --- quickstats/utils/data_conversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quickstats/utils/data_conversion.py b/quickstats/utils/data_conversion.py index c9660b89..b5fcc47c 100644 --- a/quickstats/utils/data_conversion.py +++ b/quickstats/utils/data_conversion.py @@ -18,7 +18,7 @@ root_datatypes = ["bool", "Bool_t", "Byte_t", "char", "char*", "Char_t", "unsigned long", "unsigned long long", "unsigned short", "UShort_t"] -uproot_datatypes = ["double", "float", "int", "int8_t", "int64_t", "char*", "int32_t", "uint64_t", "uint32_t"] +uproot_datatypes = ["bool", "double", "float", "int", "int8_t", "int64_t", "char*", "int32_t", "uint64_t", "uint32_t"] class ConversionMode(DescriptiveEnum): ALL = (0, "Convert all variable types") -- GitLab From de022ad7f630c36dd1f6c061ab1447fe497c40dc Mon Sep 17 00:00:00 2001 From: Chi Lung Cheng <chi.lung.cheng@cern.ch> Date: Mon, 18 Mar 2024 13:47:48 +0100 Subject: [PATCH 18/18] fix missing option --- quickstats/clis/workspace_tools.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/quickstats/clis/workspace_tools.py b/quickstats/clis/workspace_tools.py index 1c14c257..5673fdf8 100644 --- a/quickstats/clis/workspace_tools.py +++ b/quickstats/clis/workspace_tools.py @@ -65,6 +65,8 @@ def inspect_ws(input_file, ws_name=None, data_name=None, mc_name=None, output_fi help='Enable minimizer offsetting.') @click.option('--offset/--no-offset', default=True, show_default=True, help='Offset likelihood.') +@click.option('--use-binned/--use-unbinned', default=False, show_default=True, + help='Whether to convert unbinned dataset to binned dataset.') @click.option('-c', '--num_cpu', type=int, default=1, show_default=True, help='Number of CPUs to use during minimization.') @click.option('--apply-fix/--do-not-apply-fix', default=False, show_default=True, -- GitLab