Skip to content
Snippets Groups Projects

Dev

Merged Alkaid Cheng requested to merge dev into master
2 files
+ 25
11
Compare changes
  • Side-by-side
  • Inline
Files
2
@@ -40,7 +40,7 @@ def downcast_dataframe(df):
df[fcols] = df[fcols].apply(pd.to_numeric, downcast='float')
df[icols] = df[icols].apply(pd.to_numeric, downcast='integer')
def array2root(array_data:Dict[str, np.ndarray], fname:str, tree_name:str,
def array2root(array_data:Dict[str, np.ndarray], fname:str, treename:str,
library:str="auto", multithread:bool=True):
if library.lower() == "auto":
library = get_default_library()
@@ -62,15 +62,15 @@ def array2root(array_data:Dict[str, np.ndarray], fname:str, tree_name:str,
snapshot_templates = tuple(snapshot_templates)
import ROOT
df = ROOT.RDF.MakeNumpyDataFrame(array_data)
df.Snapshot.__getitem__(snapshot_templates)(tree_name, fname, columns)
df.Snapshot.__getitem__(snapshot_templates)(treename, fname, columns)
elif library == "uproot":
import uproot
from packaging import version
uproot_version = uproot.__version__
if version.parse(uproot_version) < version.parse("4.2.0"):
raise RuntimeError("uproot version too old (<4.2.0)")
raise RuntimeError("uproot version too old (requires 4.2.0+)")
file = uproot.recreate(fname)
file[tree_name] = array_data
file[treename] = array_data
file.close()
else:
raise RuntimeError(f'unknown library "{library}" for root data conversion')
@@ -86,12 +86,22 @@ def dataframe2numpy(df:"pandas.DataFrame", columns:Optional[List[str]]=None):
arrays[column] = arrays[column].astype(df[column].dtype)
return arrays
def numpy2dataframe(array_data:Dict[str, np.ndarray]):
array_shallow_copy = {**array_data}
for key, array in array_data.items():
if (array.ndim > 1) and (array.dtype != object):
array_shallow_copy[key] = list(array)
import pandas as pd
df = pd.DataFrame(array_shallow_copy)
return df
array2dataframe = numpy2dataframe
def dataframe2root(df:"pandas.DataFrame", fname:str, tree_name:str,
def dataframe2root(df:"pandas.DataFrame", fname:str, treename:str,
columns:Optional[List[str]]=None,
library:str="auto", multithread:bool=True):
array_data = dataframe2numpy(df, columns)
array2root(array_data, fname, tree_name, library=library,
array2root(array_data, fname, treename, library=library,
multithread=multithread)
def uproot_get_standard_columns(uproot_tree):
@@ -155,7 +165,12 @@ def rdf2numpy(rdf, columns:Union[Dict[str, str], List[str]]=None,
result = rdf.AsNumpy(save_columns)
for vector_column in vector_columns:
# not the most efficient way, but easiest
result[vector_column] = np.array([np.array(v.data()) for v in result[vector_column]], dtype=object)
numpy_array = np.array([np.array(v.data()) for v in result[vector_column]], dtype=object)
# in case it't array of regular size
if len(numpy_array) and (numpy_array[0].dtype == object):
result[vector_column] = np.array([np.array(v.data()) for v in result[vector_column]])
else:
result[vector_column] = numpy_array
for old_column, new_column in rename_columns.items():
result[new_column] = result.pop(old_column)
# reorder the columns to match the order given by the user
@@ -210,7 +225,7 @@ def root2numpy(filename:Union[str, List[str]], treename:str,
else:
raise RuntimeError(f'unknown library "{library}" for root data conversion')
root2array = root2numpy
root2array = root2numpy
def root2dataframe(filename:Union[str, List[str]], treename:str,
columns:Union[Dict[str, str], List[str]]=None,
@@ -225,8 +240,7 @@ def root2dataframe(filename:Union[str, List[str]], treename:str,
convert_vectors=True,
remove_non_standard_types=remove_non_standard_types,
library=library)
import pandas as pd
result = pd.DataFrame(numpy_data)
result = numpy2dataframe(numpy_data)
elif library.lower() == "uproot":
if isinstance(columns, dict):
raise RuntimeError('defining new columns are not supported when using "uproot" as the library')
Loading