diff --git a/datascout/_datascout.py b/datascout/_datascout.py index 4df2be1086c34473a2d6004c1312f31a5a0a79d9..cbcadec791f7f77fba50c9d0b58ef5803c5a9aa6 100644 --- a/datascout/_datascout.py +++ b/datascout/_datascout.py @@ -15,9 +15,10 @@ import scipy from pathlib import Path +""" ###### # Functions needed to split 2D arrays - +""" def _split_2D_array(val, in_memory=False, split_to_list=False, verbose=False): """It converts numpy 2D arrays into either 1D arrays or list of 1D arrays @@ -71,7 +72,7 @@ def _convert_dict_list(data, in_memory=False, split_to_list=False, verbose=False It is up to the user to make a deepcopy, if desired, of the data before and/or after conversion. """ - if in_memory == False: + if in_memory is False: data = copy.copy(data) if type(data) == list: for i in range(len(data)): @@ -98,9 +99,10 @@ def _convert_dict_list(data, in_memory=False, split_to_list=False, verbose=False return data +""" ###### # Functions needed to re-merge 1D arrays of 1D arrays into 2D arrays - +""" def _merge_to_2D(val, string_as_obj=False, verbose=False): """ @@ -147,7 +149,7 @@ def _revert_dict_list(data, in_memory=False, string_as_obj=False, verbose=False) """ - if in_memory == False: + if in_memory is False: data = copy.copy(data) if type(data) == list: for entry in data: @@ -174,9 +176,11 @@ def _revert_dict_list(data, in_memory=False, string_as_obj=False, verbose=False) return data -###### +""" +# # CORE function of this project: it allows to convert a pyarrow object into a dict # +""" def _convert_parrow_data( data, treat_str_arrays_as_str=True, use_list_for_2D_array=False ): @@ -191,7 +195,8 @@ def _convert_parrow_data( if use_list_for_2D_array (default=False) it will try to use lists of 1D arrays instead of 2D arrays Typically the output should be a `dict`. If, however, one is trying to convert more complex structures - like a pyarrow Table or StructArray, the output will be a list of dictionaries, if more than one data records are found. + like a pyarrow Table or StructArray, the output will be a list of dictionaries if more than one data + records are found. """ if isinstance(data, pa.lib.Table): output = [] @@ -276,8 +281,10 @@ def _convert_parrow_data( return data -###### Some important functions not so interesting for the standard user, but fundamental - +""" +###### +# Some important functions not so interesting for the standard user, but fundamental +""" def dict_to_pyarrow(input_dict): my_data_dict_converted = _convert_dict_list( @@ -303,9 +310,9 @@ def pyarrow_to_dict(input_pa): def pyarrow_to_pandas(input_pa): return dict_to_pandas(pyarrow_to_dict(input_pa)) - -####### The functions interesting for the user - +""" +####### The functions interesting for the user are the following ones: +""" def dict_to_pandas(input_dict): if not isinstance(input_dict, list): @@ -399,11 +406,12 @@ def parquet_to_pandas(filename): def parquet_to_awkward(filename): return ak.from_parquet(filename) - +""" ####### Simple save/load functions for the user +""" def save_dict(dictData, folderPath=None, filename=None, fileFormat="parquet"): - if filename == None: + if filename is None: filename = datetime.now().strftime("%Y.%m.%d.%H.%M.%S.%f") Path(folderPath).mkdir(parents=True, exist_ok=True) filename = os.path.join(folderPath, filename) @@ -434,9 +442,9 @@ def load_dict(filename, fileFormat="parquet"): else: raise ValueError("Unknown file format ({})".format(fileFormat)) - +""" ####### Some additional functions for debugging purposes - +""" def _find_lists(data, verbose=False): """