Skip to content
Snippets Groups Projects
Commit ba94a567 authored by Alkaid Cheng's avatar Alkaid Cheng
Browse files

Merge branch 'dev2' into 'master'

Dev2

See merge request !91
parents 78514eda 78130b36
Branches
Tags
1 merge request!91Dev2
Pipeline #5687845 passed
__version__ = "0.6.8.3"
__version__ = "0.6.8.4"
......@@ -63,6 +63,7 @@ class NTupleProcessTool(ConfigurableObject):
def __init__(self, sample_config:Union[Dict, str], outdir:str='output',
processor_config:Optional[str]=None,
processor_flags:Optional[List[str]]=None,
cache:bool=True,
use_template:bool=False,
multithread:bool=True,
disable_config_message:bool=False,
......@@ -81,6 +82,7 @@ class NTupleProcessTool(ConfigurableObject):
self.processor = None
if processor_config is not None:
self.load_processor_config(processor_config,
cache=cache,
use_template=use_template,
multithread=multithread)
......@@ -99,6 +101,11 @@ class NTupleProcessTool(ConfigurableObject):
self.path_manager.set_file("sample_config", config_path)
self.load_config(config_source)
if 'systematic_samples' not in self.config:
self.config['systematic_samples'] = {}
if 'systematics' not in self.config:
self.config['systematics'] = {}
if 'Nominal' in self.sample_config['systematic_samples']:
raise ValueError('Nominal samples should be placed in the "samples" key '
'(instead of "systematic_samples")')
......@@ -174,13 +181,15 @@ class NTupleProcessTool(ConfigurableObject):
import pandas as pd
index_list = ['syst_theme', 'sample', 'syst_name', 'sample_type', 'syst_var']
attribute_df = pd.DataFrame(attribute_data).set_index(index_list)
return attribute_df
return attribute_df
def load_processor_config(self, config_path:str,
cache:bool=True,
multithread:bool=True,
use_template:bool=False):
from quickstats.components.processors import RooProcessor
self.processor = RooProcessor(config_path,
cache=cache,
use_template=use_template,
multithread=multithread,
verbosity=self.stdout.verbosity)
......
......@@ -37,7 +37,7 @@ import click
@click.option('-f', '--fix', 'fix_param', default="", show_default=True,
help='Parameters to fix')
@click.option('--pois', default="", show_default=True,
help='Define the set of POIs (separated by commas) sed for calculating Minos errors.')
help='Define the set of POIs (separated by commas) set for calculating Minos errors.')
@click.option('--constrain/--no-constrain', 'constrain_nuis', default=True, show_default=True,
help='Use constrained NLL (i.e. include systematics)')
@click.option('-t', '--minimizer_type', default="Minuit2", show_default=True,
......
......@@ -4,6 +4,8 @@ import json
from .rooproc_helper_action import RooProcHelperAction
from quickstats.utils.common_utils import is_valid_file
class RooProcExport(RooProcHelperAction):
def __init__(self, filename:str):
super().__init__(filename=filename)
......@@ -15,6 +17,9 @@ class RooProcExport(RooProcHelperAction):
def _execute(self, processor:"quickstats.RooProcessor", **params):
filename = params['filename']
if processor.cache and is_valid_file(filename):
processor.stdout.info(f"INFO: Cached output `{filename}`.")
return processor
data = {k:v.GetValue() for k,v in processor.external_variables.items()}
dirname = os.path.dirname(filename)
if dirname and (not os.path.exists(dirname)):
......
......@@ -3,16 +3,18 @@ import fnmatch
from .rooproc_hybrid_action import RooProcHybridAction
from quickstats.utils.common_utils import is_valid_file
from quickstats.utils.common_utils import is_valid_file, filter_by_wildcards
class RooProcSave(RooProcHybridAction):
def __init__(self, treename:str, filename:str,
columns:Optional[List[str]]=None,
exclude:Optional[List[str]]=None,
frame:Optional[str]=None):
super().__init__(treename=treename,
filename=filename,
columns=columns,
exclude=exclude,
frame=frame)
@classmethod
......@@ -26,26 +28,23 @@ class RooProcSave(RooProcHybridAction):
if processor.cache and is_valid_file(filename):
processor.stdout.info(f'INFO: Cached output from "{filename}".')
return rdf, processor
all_columns = [str(c) for c in rdf.GetColumnNames()]
columns = params.get('columns', None)
exclude = params.get('exclude', None)
self.makedirs(filename)
if isinstance(columns, str):
columns = self.parse_as_list(columns)
if columns is None:
if processor.use_template:
from quickstats.utils.root_utils import templated_rdf_snapshot
rdf_next = templated_rdf_snapshot(rdf)(treename, filename)
else:
rdf_next = rdf.Snapshot(treename, filename)
columns = list(all_columns)
if exclude is None:
exclude = []
save_columns = filter_by_wildcards(all_columns, columns)
save_columns = filter_by_wildcards(save_columns, exclude, exclusion=True)
save_columns = list(set(save_columns))
if processor.use_template:
from quickstats.utils.root_utils import templated_rdf_snapshot
rdf_next = templated_rdf_snapshot(rdf, save_columns)(treename, filename, save_columns)
else:
all_columns = [str(c) for c in rdf.GetColumnNames()]
save_columns = []
for column in columns:
save_columns += [c for c in all_columns if fnmatch.fnmatch(c, column)]
save_columns = list(set(save_columns))
self.makedirs(filename)
if processor.use_template:
from quickstats.utils.root_utils import templated_rdf_snapshot
rdf_next = templated_rdf_snapshot(rdf, save_columns)(treename, filename, save_columns)
else:
rdf_next = rdf.Snapshot(treename, filename, save_columns)
processor.stdout.info(f'INFO: Writing output to "{filename}".')
rdf_next = rdf.Snapshot(treename, filename, save_columns)
processor.stdout.info(f'Writing output to "{filename}".')
return rdf_next, processor
\ No newline at end of file
......@@ -13,6 +13,6 @@ class RooProcSaveFrame(RooProcHelperAction):
def _execute(self, processor:"quickstats.RooProcessor", **params):
frame_name = params['name']
if frame_name in processor.rdf_frames:
processor.stdout.warning(f"WARNING: Overriding existing rdf frame `{frame_name}`")
processor.stdout.warning(f'Overriding existing rdf frame "{frame_name}"')
processor.rdf_frames[frame_name] = processor.rdf
return processor
\ No newline at end of file
......@@ -154,7 +154,10 @@ class RooProcessor(AbstractObject):
def run(self, filename:Union[List[str], str]):
self.sanity_check()
all_files = self._get_all_files(filename)
if len(all_files) == 1:
if len(all_files) == 0:
self.stdout.info(f'No files to be processed. Skipped.')
return None
elif len(all_files) == 1:
self.stdout.info(f'Processing file "{all_files[0]}".')
else:
self.stdout.info(f"Professing files")
......
......@@ -202,13 +202,25 @@ def min_max_to_range(min_val:Optional[float]=None, max_val:Optional[float]=None)
if (min_val is not None) and (max_val is not None):
return (min_val, max_val)
raise ValueError("min and max values must be all None or all float")
def get_clipped_data(x:np.ndarray,
bin_range:Optional[Sequence]=None,
clip_lower:bool=True,
clip_upper:bool=True):
if (bin_range is None) or ((clip_lower == False) and (clip_upper == False)):
return np.array(x)
xmin = bin_range[0] if clip_lower else None
xmax = bin_range[1] if clip_upper else None
return np.clip(x, xmin, xmax)
def histogram(x:np.ndarray, weights:Optional[np.ndarray]=None,
bins:Union[int, Sequence]=10,
bin_range:Optional[Sequence]=None,
underflow:bool=False,
overflow:bool=False,
normalize:bool=True,
clip_weight:bool=False,
evaluate_error:bool=True,
evaluate_error:bool=False,
error_option:Union[BinErrorOption, str]="auto"):
"""
Compute the histogram of a data array.
......@@ -228,14 +240,19 @@ def histogram(x:np.ndarray, weights:Optional[np.ndarray]=None,
bin_range: (optional) sequence of the form (float, float)
The lower and upper range of the bins. If not provided, range is simply
``(x.min(), x.max())``. Values outside the range are ignored.
underflow: bool, default = False
Include undeflow data in the first bin.
overflow: bool, default = False
Include overflow data in the last bin.
normalize: bool, default = True
If True, the sum of bin contents is normalized to one.
clip_weight: bool, default = True
If True, ignore data outside given range when evaluating total weight
Normalize the sum of weights to one. Weights outside the bin range will
not be counted if ``clip_weight`` is set to false, so the sum of bin
content could be less than one.
clip_weight: bool, default = False
Ignore data outside given range when evaluating total weight
used in normalization.
evaluate_error: bool, default = True
If True, evaluate the error of the bin contents using the given error
option.
Evaluate the error of the bin contents using the given error option.
error_option: BinErrorOption or str, default = "auto"
How to evaluate bin errors. If "sumw2", symmetric errors from the Wald
approximation is used (square root of sum of squares of weights). If
......@@ -252,7 +269,8 @@ def histogram(x:np.ndarray, weights:Optional[np.ndarray]=None,
bin_errors: np.ndarray
The bin errors of the histogram.
"""
x = np.array(x)
x = get_clipped_data(x, bin_range=bin_range, clip_lower=underflow,
clip_upper=overflow)
if weights is None:
weights = np.ones(x.shape)
......@@ -270,6 +288,7 @@ def histogram(x:np.ndarray, weights:Optional[np.ndarray]=None,
norm_factor = 1
bin_content, bin_edges = np.histogram(x, bins=bins, range=bin_range, weights=weights)
if evaluate_error:
error_option = BinErrorOption.parse(error_option)
if error_option == BinErrorOption.AUTO:
......@@ -277,7 +296,7 @@ def histogram(x:np.ndarray, weights:Optional[np.ndarray]=None,
error_option = BinErrorOption.POISSON if unit_weight else BinErrorOption.SUMW2
if error_option == BinErrorOption.POISSON:
pois_interval = get_poisson_interval(bin_content)
bin_errors = (pois_interval["lo"] / norm_factor, pois_interval["hi"] / norm_factor)
bin_errors = (pois_interval["lo"], pois_interval["hi"])
elif error_option == BinErrorOption.SUMW2:
bin_content_weight2, _ = np.histogram(x, bins=bins, range=bin_range, weights=weights**2)
bin_errors = np.sqrt(bin_content_weight2)
......@@ -297,6 +316,8 @@ def histogram(x:np.ndarray, weights:Optional[np.ndarray]=None,
def get_hist_data(x:np.ndarray, weights:Optional[np.ndarray]=None,
bins:Union[int, Sequence]=10,
bin_range:Optional[Sequence]=None,
underflow:bool=False,
overflow:bool=False,
normalize:bool=True,
clip_weight:bool=False,
xerr:bool=True,
......@@ -320,8 +341,14 @@ def get_hist_data(x:np.ndarray, weights:Optional[np.ndarray]=None,
bin_range: (optional) sequence of the form (float, float)
The lower and upper range of the bins. If not provided, range is simply
``(x.min(), x.max())``. Values outside the range are ignored.
underflow: bool, default = False
Include undeflow data in the first bin.
overflow: bool, default = False
Include overflow data in the last bin.
normalize: bool, default = True
If True, the sum of bin contents is normalized to one.
Normalize the sum of weights to one. Weights outside the bin range will
not be counted if ``clip_weight`` is set to false, so the sum of bin
content could be less than one.
clip_weight: bool, default = True
If True, ignore data outside given range when evaluating total weight
used in normalization.
......@@ -345,6 +372,8 @@ def get_hist_data(x:np.ndarray, weights:Optional[np.ndarray]=None,
"""
y, bin_edges, yerr = histogram(x, weights=weights,
bins=bins, bin_range=bin_range,
underflow=underflow,
overflow=overflow,
normalize=normalize,
clip_weight=clip_weight,
evaluate_error=yerr,
......@@ -367,6 +396,8 @@ def get_stacked_hist_data(x:List[np.ndarray],
weights:List[Optional[np.ndarray]]=None,
bins:Union[int, Sequence]=10,
bin_range:Optional[Sequence]=None,
underflow:bool=False,
overflow:bool=False,
normalize:bool=True,
clip_weight:bool=False,
xerr:bool=True,
......@@ -380,6 +411,8 @@ def get_stacked_hist_data(x:List[np.ndarray],
bin_range = (np.min(x), np.max(x))
return get_hist_data(x=x, weights=weights,
bins=bins, bin_range=bin_range,
underflow=underflow,
overflow=overflow,
normalize=normalize,
clip_weight=clip_weight,
xerr=xerr, yerr=yerr,
......
......@@ -15,7 +15,8 @@ from quickstats.maths.numerics import safe_div
from quickstats.maths.statistics import (HistComparisonMode,
min_max_to_range, get_hist_data,
get_stacked_hist_data,
get_hist_comparison_data)
get_hist_comparison_data,
get_clipped_data)
from .core import PlotFormat, ErrorDisplayFormat
......@@ -348,6 +349,8 @@ class VariableDistributionPlot(AbstractPlot):
column_name:str, weight_name:Optional[str]=None,
bins:Union[int, Sequence]=25,
bin_range:Optional[Sequence]=None,
underflow:bool=False,
overflow:bool=False,
normalize:bool=True,
show_error:bool=False,
variable_scale:Optional[float]=None):
......@@ -367,6 +370,7 @@ class VariableDistributionPlot(AbstractPlot):
variable_scale=variable_scale,
weight_scale=weight_scale,
weight_name=weight_name)
x = get_clipped_data(x, bin_range=bin_range, clip_lower=underflow, clip_upper=overflow)
stacked_data['x'].append(x)
stacked_data['weights'].append(weights)
stacked_data['color'].append(color)
......@@ -386,6 +390,8 @@ class VariableDistributionPlot(AbstractPlot):
bin_edges = np.histogram_bin_edges(np.concatenate(stacked_data['x']).flatten(),
bins=bins, range=bin_range)
hist_data = get_stacked_hist_data(stacked_data['x'], stacked_data['weights'],
underflow=underflow,
overflow=overflow,
normalize=normalize,
bin_range=bin_range, bins=bins,
clip_weight=False,
......@@ -398,6 +404,7 @@ class VariableDistributionPlot(AbstractPlot):
targets:Optional[List[str]]=None,
xlabel:str="", ylabel:str="Fraction of Events / {bin_width:.2f}",
bins:Union[int, Sequence]=25, bin_range:Optional[Sequence]=None,
underflow:bool=False, overflow:bool=False,
normalize:bool=True, show_error:bool=False, show_error_legend:bool=False,
stacked:bool=False, xmin:Optional[float]=None, xmax:Optional[float]=None,
ymin:Optional[float]=None, ymax:Optional[float]=None, ypad:float=0.3,
......@@ -427,8 +434,14 @@ class VariableDistributionPlot(AbstractPlot):
including the rightmost edge.
bin_range: (optional) (float, float)
Range of histogram bins.
underflow: bool, default = False
Include undeflow data in the first bin.
overflow: bool, default = False
Include overflow data in the last bin.
normalize: bool, default = True
Normalize the sum of histogram to unity.
Normalize the sum of weights to one. Weights outside the bin range will
not be counted if ``clip_weight`` is set to false, so the sum of bin
content could be less than one.
show_error: bool, default = False
Whether to display data error.
show_error_legend: bool, default = False
......@@ -495,6 +508,8 @@ class VariableDistributionPlot(AbstractPlot):
column_name=column_name,
weight_name=weight_name,
bins=bins, bin_range=bin_range,
underflow=underflow,
overflow=overflow,
normalize=normalize,
variable_scale=variable_scale)
label = self.config['stacked_label'].format(index=stack_index)
......@@ -511,7 +526,8 @@ class VariableDistributionPlot(AbstractPlot):
weight_scale=weight_scale,
weight_name=weight_name)
bin_edges = np.histogram_bin_edges(x, bins=bins, range=bin_range)
hist_data = get_hist_data(x, weights, normalize=normalize,
hist_data = get_hist_data(x, weights, underflow=underflow,
overflow=overflow, normalize=normalize,
bin_range=bin_range, bins=bins,
clip_weight=False,
xerr=show_error and self.config['show_xerr'],
......@@ -522,7 +538,8 @@ class VariableDistributionPlot(AbstractPlot):
if plot_format == PlotFormat.HIST:
if normalize:
weights /= weights.sum()
hist_y, _, handle = ax.hist(x, bins, range=bin_range,
x_ = get_clipped_data(x, bin_range=bin_range, clip_lower=underflow, clip_upper=overflow)
hist_y, _, handle = ax.hist(x_, bins, range=bin_range,
weights=weights, **styles)
assert np.allclose(hist_data['y'], hist_y)
_, error_handle = self.draw_binned_data(ax, hist_data,
......
......@@ -183,6 +183,8 @@ def set_scripts_path(scripts_path, undo=False):
os.environ["PYTHONPATH"] = scripts_path + ":" + os.environ.get("PYTHONPATH", "")
def is_valid_file(fname:str):
if not fname:
return False
if not os.path.exists(fname):
return False
ext = os.path.splitext(fname)[-1]
......
from typing import Union, Optional, Dict, List
from typing import Union, Optional, Dict, List, Sequence
import os
import re
import glob
......@@ -50,7 +50,7 @@ def downcast_dataframe(df):
df[fcols] = df[fcols].apply(pd.to_numeric, downcast='float')
df[icols] = df[icols].apply(pd.to_numeric, downcast='integer')
def array2root(array_data:Dict[str, np.ndarray], fname:str, treename:str,
def array2root(array_data:Dict[str, np.ndarray], filename:str, treename:str,
library:str="auto", multithread:bool=True):
if library.lower() == "auto":
library = get_default_library()
......@@ -72,14 +72,14 @@ def array2root(array_data:Dict[str, np.ndarray], fname:str, treename:str,
snapshot_templates = tuple(snapshot_templates)
import ROOT
df = ROOT.RDF.MakeNumpyDataFrame(array_data)
df.Snapshot.__getitem__(snapshot_templates)(treename, fname, columns)
df.Snapshot.__getitem__(snapshot_templates)(treename, filename, columns)
elif library == "uproot":
import uproot
from packaging import version
uproot_version = uproot.__version__
if version.parse(uproot_version) < version.parse("4.2.0"):
raise RuntimeError("uproot version too old (requires 4.2.0+)")
file = uproot.recreate(fname)
file = uproot.recreate(filename)
file[treename] = array_data
file.close()
else:
......@@ -107,11 +107,11 @@ def numpy2dataframe(array_data:Dict[str, np.ndarray]):
array2dataframe = numpy2dataframe
def dataframe2root(df:"pandas.DataFrame", fname:str, treename:str,
def dataframe2root(df:"pandas.DataFrame", filename:str, treename:str,
columns:Optional[List[str]]=None,
library:str="auto", multithread:bool=True):
array_data = dataframe2numpy(df, columns)
array2root(array_data, fname, treename, library=library,
array2root(array_data, filename, treename, library=library,
multithread=multithread)
def uproot_get_standard_columns(uproot_tree):
......@@ -145,6 +145,65 @@ def reduce_vector_types(column_types:List[str]):
reduced_column_types.append(column_type)
reduced_column_types = np.array(reduced_column_types)
return reduced_column_types
def make_iter_result(results, downcast:bool=False):
if downcast:
for result in results:
downcast_dataframe(result)
yield result
for result in results:
yield result
def iterate_uproot(files:List[str], columns:Optional[Union[str, List[str], Dict]]=None,
filter_typename=None, step_size:Union[str, int]='100 MB',
cut:Optional[str]=None, iterate:bool=False, library:str='numpy',
downcast:bool=True):
import uproot
assert library in ['numpy', 'pandas']
if columns is None:
expressions = None
aliases = None
elif isinstance(columns, str):
expressions = columns
aliases = None
elif isinstance(columns, Sequence):
expressions = list(columns)
aliases = None
elif isinstance(columns, dict):
expressions = list(columns)
aliases = {k:v for k, v in columns.items() if k != v}
else:
raise TypeError('columns must be a string, list of strings or a dictionary')
results = uproot.iterate(files, expressions=expressions,
filter_typename=filter_typename,
step_size=step_size,
aliases=aliases,
cut=cut, library=library)
if not iterate:
if library == 'numpy':
result = {}
for batch in results:
for column in batch:
if column not in result:
result[column] = batch[column]
else:
result[column] = np.concatenate([result[column], batch[column]])
return result
else:
result = None
for batch in results:
if downcast:
downcast_dataframe(batch)
if result is None:
result = batch
else:
result = pd.concat([result, batch])
return result
else:
if (library == 'pandas') and (downcast):
return make_iter_result(results, downcast=True)
return make_iter_result(results, downcast=False)
def rdf2numpy(rdf, columns:Union[Dict[str, str], List[str]]=None,
cut:Optional[str]=None, convert_vectors:bool=True,
......@@ -228,6 +287,7 @@ def root2numpy(filename:Union[str, List[str]], treename:str,
columns:Union[Dict[str, str], List[str]]=None,
cut:Optional[str]=None, convert_vectors:bool=True,
mode:Union[str, int, ConversionMode]=1,
step_size:Union[str, int]='100 MB', iterate:bool=False,
library:str="auto", multithread:bool=True):
if isinstance(filename, str) and os.path.isdir(filename):
filename = glob.glob(os.path.join(filename, "*.root"))
......@@ -245,36 +305,22 @@ def root2numpy(filename:Union[str, List[str]], treename:str,
convert_vectors=convert_vectors,
mode=mode)
elif library.lower() == "uproot":
if isinstance(columns, dict):
raise RuntimeError('defining new columns are not supported when using "uproot" as the library')
import uproot
if isinstance(filename, str):
f = uproot.open(filename)
t = f[treename]
if conversion_mode == ConversionMode.REMOVE_NON_STANDARD_TYPE:
standard_columns = uproot_get_standard_columns(t)
if columns is None:
columns = standard_columns
else:
columns = [column for column in columns if column in standard_columns]
return f[treename].arrays(columns, library="numpy", cut=cut)
if not isinstance(filename, list):
filename = [filename]
# iterate over multiple files
files = {f:treename for f in filename}
if conversion_mode == ConversionMode.REMOVE_NON_STANDARD_TYPE:
filter_typename = list(uproot_datatypes)
else:
# iterate over multiple files
files = {f:treename for f in filename}
if conversion_mode == ConversionMode.REMOVE_NON_STANDARD_TYPE:
filter_typename = list(uproot_datatypes)
else:
filter_typename = None
result = {}
for batch in uproot.iterate(files, expressions=columns,
filter_typename=filter_typename,
cut=cut, library="numpy"):
for column in batch:
if column not in result:
result[column] = batch[column]
else:
result[column] = np.concatenate([result[column], batch[column]])
return result
filter_typename = None
result = iterate_uproot(files, columns=columns,
filter_typename=filter_typename,
step_size=step_size,
cut=cut, library='numpy',
iterate=iterate,
downcast=False)
return result
else:
raise RuntimeError(f'unknown library "{library}" for root data conversion')
......@@ -284,7 +330,8 @@ def root2dataframe(filename:Union[str, List[str]], treename:str,
columns:Union[Dict[str, str], List[str]]=None,
cut:Optional[str]=None,
mode:Union[str, int, ConversionMode]=1,
downcast:bool=True,
downcast:bool=True, iterate:bool=False,
step_size:Union[str, int]='100 MB',
library:str="auto", multithread:bool=True):
conversion_mode = ConversionMode.parse(mode)
if conversion_mode == ConversionMode.REMOVE_NON_ARRAY_TYPE:
......@@ -298,38 +345,25 @@ def root2dataframe(filename:Union[str, List[str]], treename:str,
library=library,
multithread=multithread)
result = numpy2dataframe(numpy_data)
if downcast:
downcast_dataframe(result)
elif library.lower() == "uproot":
if isinstance(columns, dict):
raise RuntimeError('defining new columns are not supported when using "uproot" as the library')
import uproot
if isinstance(filename, str):
f = uproot.open(filename)
t = f[treename]
if conversion_mode == ConversionMode.REMOVE_NON_STANDARD_TYPE:
standard_columns = uproot_get_standard_columns(t)
if columns is None:
columns = standard_columns
else:
columns = [column for column in columns if column in standard_columns]
result = f[treename].arrays(columns, library="pandas")
if not isinstance(filename, list):
filename = [filename]
import pandas as pd
# iterate over multiple files
files = {f:treename for f in filename}
if conversion_mode == ConversionMode.REMOVE_NON_STANDARD_TYPE:
filter_typename = list(uproot_datatypes)
else:
import pandas as pd
# iterate over multiple files
files = {f:treename for f in filename}
if conversion_mode == ConversionMode.REMOVE_NON_STANDARD_TYPE:
filter_typename = list(uproot_datatypes)
else:
filter_typename = None
result = None
for batch in uproot.iterate(files, expressions=columns,
filter_typename=filter_typename,
cut=cut, library="pandas"):
if result is None:
result = batch
else:
result = pd.concat([result, batch])
if downcast:
downcast_dataframe(result)
filter_typename = None
result = iterate_uproot(files, columns=columns,
filter_typename=filter_typename,
step_size=step_size,
cut=cut, library='pandas',
iterate=iterate,
downcast=False)
return result
def root2rdataset(filename:Union[str, List[str], "quickstats.PathManager"], treename:str,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment