diff --git a/datascout/_datascout.py b/datascout/_datascout.py index cbcadec791f7f77fba50c9d0b58ef5803c5a9aa6..173fe30b174f8994ca4a4572036a8ad68d6688a0 100644 --- a/datascout/_datascout.py +++ b/datascout/_datascout.py @@ -20,6 +20,7 @@ from pathlib import Path # Functions needed to split 2D arrays """ + def _split_2D_array(val, in_memory=False, split_to_list=False, verbose=False): """It converts numpy 2D arrays into either 1D arrays or list of 1D arrays @@ -104,6 +105,7 @@ def _convert_dict_list(data, in_memory=False, split_to_list=False, verbose=False # Functions needed to re-merge 1D arrays of 1D arrays into 2D arrays """ + def _merge_to_2D(val, string_as_obj=False, verbose=False): """ _merge_to_2D(val, string_as_obj=False, verbose=False) @@ -181,6 +183,8 @@ def _revert_dict_list(data, in_memory=False, string_as_obj=False, verbose=False) # CORE function of this project: it allows to convert a pyarrow object into a dict # """ + + def _convert_parrow_data( data, treat_str_arrays_as_str=True, use_list_for_2D_array=False ): @@ -195,7 +199,7 @@ def _convert_parrow_data( if use_list_for_2D_array (default=False) it will try to use lists of 1D arrays instead of 2D arrays Typically the output should be a `dict`. If, however, one is trying to convert more complex structures - like a pyarrow Table or StructArray, the output will be a list of dictionaries if more than one data + like a pyarrow Table or StructArray, the output will be a list of dictionaries if more than one data records are found. """ if isinstance(data, pa.lib.Table): @@ -282,10 +286,11 @@ def _convert_parrow_data( """ -###### +###### # Some important functions not so interesting for the standard user, but fundamental """ + def dict_to_pyarrow(input_dict): my_data_dict_converted = _convert_dict_list( input_dict, in_memory=False, split_to_list=False, verbose=False @@ -310,10 +315,12 @@ def pyarrow_to_dict(input_pa): def pyarrow_to_pandas(input_pa): return dict_to_pandas(pyarrow_to_dict(input_pa)) + """ ####### The functions interesting for the user are the following ones: """ + def dict_to_pandas(input_dict): if not isinstance(input_dict, list): input_dict = [input_dict] @@ -406,10 +413,12 @@ def parquet_to_pandas(filename): def parquet_to_awkward(filename): return ak.from_parquet(filename) + """ ####### Simple save/load functions for the user """ + def save_dict(dictData, folderPath=None, filename=None, fileFormat="parquet"): if filename is None: filename = datetime.now().strftime("%Y.%m.%d.%H.%M.%S.%f") @@ -442,10 +451,12 @@ def load_dict(filename, fileFormat="parquet"): else: raise ValueError("Unknown file format ({})".format(fileFormat)) + """ ####### Some additional functions for debugging purposes """ + def _find_lists(data, verbose=False): """ Look inside data (assumed to be a dict) and tell if some fields are actually lists. diff --git a/docs/source/conf.py b/docs/source/conf.py index 3ef26323853d283b4658ae7fc4a2bda34509c354..00d33f16abdc017bcf019ec66bcff5e26b12f7a0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -16,11 +16,11 @@ copyright = "{0}, CERN".format(datetime.datetime.now().year) # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'acc_py_sphinx.theme', - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.doctest', - 'sphinx.ext.napoleon', + "acc_py_sphinx.theme", + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.doctest", + "sphinx.ext.napoleon", ]