From 536ed145d36fadc76d092877bfefb07260152c51 Mon Sep 17 00:00:00 2001
From: Davide Gamba <davide.gamba@cern.ch>
Date: Mon, 24 May 2021 18:06:19 +0200
Subject: [PATCH] black -ed code

---
 datascout/__init__.py                  |   4 +-
 datascout/_datascout.py                | 296 +++++++++++++++----------
 datascout/tests/test_dataconversion.py | 245 ++++++++++++++------
 examples/000_example.py                |  95 ++++----
 setup.py                               |  52 ++---
 5 files changed, 439 insertions(+), 253 deletions(-)

diff --git a/datascout/__init__.py b/datascout/__init__.py
index a5771ed..442097a 100644
--- a/datascout/__init__.py
+++ b/datascout/__init__.py
@@ -6,8 +6,8 @@ list of sweet functions for data conversion and writing to disk
 __version__ = "0.0.1.beta0"
 
 
-# to look at pyarrow, typically not used by a user, 
-# but key functions for this package 
+# to look at pyarrow, typically not used by a user,
+# but key functions for this package
 from ._datascout import dict_to_pyarrow
 from ._datascout import pyarrow_to_parquet
 from ._datascout import parquet_to_pyarrow
diff --git a/datascout/_datascout.py b/datascout/_datascout.py
index cfe39ec..af28cae 100644
--- a/datascout/_datascout.py
+++ b/datascout/_datascout.py
@@ -18,57 +18,59 @@ from pathlib import Path
 ######
 # Functions needed to split 2D arrays
 
-def _split_2D_array(val, in_memory=False, split_to_list=False, verbose=False):
-    '''
-    _split_2D_array(val, in_memory=False, split_to_list=False, verbose=False)
 
-    It converts numpy 2D arrays into either:
-    - 1D "object" arrays containing 1D val.dtype arrays (split_to_list=False)
-    - list of 1D val.dtype arrays (split_to_list=True)
-    by default, split_to_list=False
+def _split_2D_array(val, in_memory=False, split_to_list=False, verbose=False):
+    """It converts numpy 2D arrays into either 1D arrays or list of 1D arrays
 
-    It returns the split value or the original value if the input was not
+    Args:
+        val (numpy.ndarray): the array to convert
+        in_memory (bool): data is not copied but just represented in a different form (default=False)
+        split_to_list (bool): data is split in a 1D list instead of 1D object array (default=False)
+        verbose (bool): print some information when data is split (default=False)
 
-    If in_memory == True (default=False), data is not copied but just represented in a different form
-    '''
+    Returns:
+        the split value or the original value if the input was not of the right type or did not need to be split.
+    """
     if (type(val) == np.ndarray) and len(np.shape(val)) == 2:
         if not in_memory:
             val = copy.deepcopy(val)
-            if verbose: print('made a copy of '+str(val))
+            if verbose:
+                print("made a copy of " + str(val))
         if split_to_list:
             newVal = list(val)
         else:
             # (TODO: probably to be done better!!!)
             auxDim = np.shape(val)[0]
             # split val, without making data copy
-            auxData = np.split(np.ravel(val), auxDim) 
-            # put it in object array 
-            newVal = np.empty((auxDim,), dtype=object) 
+            auxData = np.split(np.ravel(val), auxDim)
+            # put it in object array
+            newVal = np.empty((auxDim,), dtype=object)
             for i in range(auxDim):
                 newVal[i] = auxData[i]
         if verbose:
-            print('     -----    ')
-            print(str(val)+' ('+str(type(val))+')')
-            print(' -> converted to -> ')
-            print(str(newVal)+' ('+str(type(newVal))+')')
+            print("     -----    ")
+            print(str(val) + " (" + str(type(val)) + ")")
+            print(" -> converted to -> ")
+            print(str(newVal) + " (" + str(type(newVal)) + ")")
         return newVal
     else:
         return val
 
+
 def _convert_dict_list(data, in_memory=False, split_to_list=False, verbose=False):
-    '''
+    """
     Parse the input data, which should be a list or a dict, and convert all 2D arrays into either
     - 1D object array of 1D arrays
     - 1D list of 1D arrays
 
-    If in_memory=True (default False), it changes the data in memory. 
+    If in_memory=True (default False), it changes the data in memory.
     In any case, the modified data is returned.
-    
+
     NOTE: The conversion is done such to reduce to minimum (I think) data copy: i.e. the actual data
     is not copied (or copies are reduced to the minimum).
     It is up to the user to make a deepcopy, if desired, of the data before and/or after conversion.
-    
-    '''
+
+    """
     if in_memory == False:
         data = copy.copy(data)
     if type(data) == list:
@@ -76,62 +78,74 @@ def _convert_dict_list(data, in_memory=False, split_to_list=False, verbose=False
             if type(data[i]) == list or type(data[i]) == dict:
                 data[i] = _convert_dict_list(data[i])
             elif type(data[i]) == np.ndarray:
-                data[i] = _split_2D_array(data[i], in_memory=in_memory, split_to_list=split_to_list, verbose=verbose)
+                data[i] = _split_2D_array(
+                    data[i],
+                    in_memory=in_memory,
+                    split_to_list=split_to_list,
+                    verbose=verbose,
+                )
     elif type(data) == dict:
         for key in data.keys():
             if type(data[key]) == list or type(data[key]) == dict:
                 data[key] = _convert_dict_list(data[key])
             elif type(data[key]) == np.ndarray:
-                data[key] = _split_2D_array(data[key], in_memory=in_memory, split_to_list=split_to_list, verbose=verbose)
+                data[key] = _split_2D_array(
+                    data[key],
+                    in_memory=in_memory,
+                    split_to_list=split_to_list,
+                    verbose=verbose,
+                )
     return data
 
 
 ######
 # Functions needed to re-merge 1D arrays of 1D arrays into 2D arrays
 
+
 def _merge_to_2D(val, string_as_obj=False, verbose=False):
-    '''
+    """
     _merge_to_2D(val, string_as_obj=False, verbose=False)
 
     It converts back numpy arrays of "object" dtype into 2D arrays.
-    By construction, if conversion actually occurs, this operation makes a copy of 
+    By construction, if conversion actually occurs, this operation makes a copy of
     the data (probably with some exceptions)
-    
-    string_as_obj=False (default): 
-    This options (if enabled) makes sure that the returned object is a 2D array of "object" 
+
+    string_as_obj=False (default):
+    This options (if enabled) makes sure that the returned object is a 2D array of "object"
     data type in case of string arrays. This is necessary in case you want to edit one string
     of the array without it being cut...
-    '''
+    """
     if ((type(val) == np.ndarray) and val.dtype == object) or (type(val) == list):
         newVal = np.stack(val)
         # fix subtle issue with strings which I am assuming are arrays of objects
         if string_as_obj and (newVal.dtype.type is np.str_):
             newVal = newVal.astype(object)
         if verbose:
-            print('     -----    ')
-            print(str(val)+' ('+str(type(val))+')')
-            print(' -> reverted to -> ')
-            print(str(newVal)+' ('+str(newVal.dtype)+')')
+            print("     -----    ")
+            print(str(val) + " (" + str(type(val)) + ")")
+            print(" -> reverted to -> ")
+            print(str(newVal) + " (" + str(newVal.dtype) + ")")
         return newVal
     else:
         return val
 
+
 def _revert_dict_list(data, in_memory=False, string_as_obj=False, verbose=False):
-    '''
+    """
     Parse the input data, which should be a list or a dict, and convert all 1D arrays of "object" type
-    into 2D arrays of the proper data type. 
+    into 2D arrays of the proper data type.
 
     If string_as_obj=True (default=False), the obtained 2D arrays of strings are converted into 2D arrays
     of "object" dtype.
 
-    If in_memory=True (default False), it changes the data in memory. 
+    If in_memory=True (default False), it changes the data in memory.
     In any case, the modified data is returned.
-    
+
     NOTE: The conversion is done such to reduce to minimum (I think) data copy: i.e. the actual data
     is not copied (or copies are reduced to the minimum).
     It is up to the user to make a deepcopy, if desired, of the data before and/or after conversion.
-    
-    '''
+
+    """
 
     if in_memory == False:
         data = copy.copy(data)
@@ -140,7 +154,9 @@ def _revert_dict_list(data, in_memory=False, string_as_obj=False, verbose=False)
             if type(entry) == dict:
                 _revert_dict_list(entry)
             elif type(entry) == list or type(entry) == np.ndarray:
-                entry = _merge_to_2D(entry, string_as_obj=string_as_obj, verbose=verbose)
+                entry = _merge_to_2D(
+                    entry, string_as_obj=string_as_obj, verbose=verbose
+                )
                 if len(entry) > 0 and isinstance(entry.flatten()[0], dict):
                     for nasted_data in entry.flatten():
                         _revert_dict_list(nasted_data)
@@ -149,20 +165,25 @@ def _revert_dict_list(data, in_memory=False, string_as_obj=False, verbose=False)
             if type(data[key]) == dict:
                 _revert_dict_list(data[key])
             elif type(data[key]) == list or type(data[key]) == np.ndarray:
-                data[key] = _merge_to_2D(data[key], string_as_obj=string_as_obj, verbose=verbose)
+                data[key] = _merge_to_2D(
+                    data[key], string_as_obj=string_as_obj, verbose=verbose
+                )
                 if len(data[key]) > 0 and isinstance(data[key].flatten()[0], dict):
                     for nasted_data in data[key].flatten():
                         _revert_dict_list(nasted_data)
     return data
 
+
 ######
 # CORE function of this project: it allows to convert a pyarrow object into a dict
 #
-def _convert_parrow_data(data, treat_str_arrays_as_str=True, use_list_for_2D_array=False):
-    '''
+def _convert_parrow_data(
+    data, treat_str_arrays_as_str=True, use_list_for_2D_array=False
+):
+    """
     _convert_parrow_data(data)
 
-    it extract data from a pyarrow object to a "standard" pyjapcscout-like dict dataset, 
+    it extract data from a pyarrow object to a "standard" pyjapcscout-like dict dataset,
     i.e. a dictionary with only not null numpy objects/arrays and no lists (but if you enable use_list_for_2D_array)
 
     if treat_str_arrays_as_str (default=True) it will try to preserve str data type also for arrays
@@ -171,18 +192,20 @@ def _convert_parrow_data(data, treat_str_arrays_as_str=True, use_list_for_2D_arr
 
     Typically the output should be a `dict`. If, however, one is trying to convert more complex structures
     like a pyarrow Table or StructArray, the output will be a list of dictionaries, if more than one data records are found.
-    '''
+    """
     if isinstance(data, pa.lib.Table):
         output = []
         for irow in range(data.num_rows):
             outputRow = dict()
             for column in data.column_names:
-                #if len(data['device1']) -> probably to be done something like this...
+                # if len(data['device1']) -> probably to be done something like this...
                 device_dict = dict()
                 # those should be value, header, exception
                 for item in data[column][irow].items():
                     # this can be iterated... I think
-                    device_dict[item[0]] = _convert_parrow_data(item[1], treat_str_arrays_as_str, use_list_for_2D_array)
+                    device_dict[item[0]] = _convert_parrow_data(
+                        item[1], treat_str_arrays_as_str, use_list_for_2D_array
+                    )
                 outputRow[column] = device_dict
             output.append(outputRow)
         if len(output) == 1:
@@ -192,7 +215,11 @@ def _convert_parrow_data(data, treat_str_arrays_as_str=True, use_list_for_2D_arr
     elif isinstance(data, pa.StructArray):
         output = []
         for row in data:
-            output.append(_convert_parrow_data(row, treat_str_arrays_as_str, use_list_for_2D_array))
+            output.append(
+                _convert_parrow_data(
+                    row, treat_str_arrays_as_str, use_list_for_2D_array
+                )
+            )
         if len(output) == 1:
             return output[0]
         else:
@@ -200,12 +227,16 @@ def _convert_parrow_data(data, treat_str_arrays_as_str=True, use_list_for_2D_arr
     elif isinstance(data, pa.StructScalar):
         output = dict()
         for item in data.items():
-            output[item[0]] = _convert_parrow_data(item[1], treat_str_arrays_as_str, use_list_for_2D_array)
+            output[item[0]] = _convert_parrow_data(
+                item[1], treat_str_arrays_as_str, use_list_for_2D_array
+            )
         return output
     elif isinstance(data, pa.ListScalar):
         if isinstance(data.type.value_type, pa.lib.ListType):
-            aux_dtype= data.type.value_type.value_type.to_pandas_dtype()
-            if treat_str_arrays_as_str and data.type.value_type.value_type.equals(pa.string()):
+            aux_dtype = data.type.value_type.value_type.to_pandas_dtype()
+            if treat_str_arrays_as_str and data.type.value_type.value_type.equals(
+                pa.string()
+            ):
                 # actually a string! not a generic object....
                 aux_dtype = np.str_
             return np.array(data.as_py(), dtype=aux_dtype)
@@ -214,12 +245,18 @@ def _convert_parrow_data(data, treat_str_arrays_as_str=True, use_list_for_2D_arr
                 if use_list_for_2D_array:
                     output = []
                     for auxValue in data.values:
-                        output.append(_convert_parrow_data(auxValue), treat_str_arrays_as_str, use_list_for_2D_array)
+                        output.append(
+                            _convert_parrow_data(auxValue),
+                            treat_str_arrays_as_str,
+                            use_list_for_2D_array,
+                        )
                     return output
                 else:
                     output = np.empty((len(data.values),), dtype=object)
                     for i, auxValue in enumerate(data.values):
-                        output[i] = _convert_parrow_data(auxValue, treat_str_arrays_as_str, use_list_for_2D_array)
+                        output[i] = _convert_parrow_data(
+                            auxValue, treat_str_arrays_as_str, use_list_for_2D_array
+                        )
                     return output
             else:
                 # could be a 1D array of some data type
@@ -229,189 +266,218 @@ def _convert_parrow_data(data, treat_str_arrays_as_str=True, use_list_for_2D_arr
                     aux_dtype = np.str_
                 return np.array(data.as_py(), dtype=aux_dtype)
         else:
-            print('Zzzuth...')
+            print("Zzzuth...")
             return data
     elif issubclass(type(data), pa.lib.Scalar):
         # horrible casting!... did not find a better way....
         return data.type.to_pandas_dtype()(data.as_py())
     else:
-        print('Sigh... unknown data type: '+str(type(data)))
+        print("Sigh... unknown data type: " + str(type(data)))
         return data
 
 
 ###### Some important functions not so interesting for the standard user, but fundamental
 
+
 def dict_to_pyarrow(input_dict):
-    my_data_dict_converted = _convert_dict_list(input_dict, in_memory=False, split_to_list=False, verbose=False)
+    my_data_dict_converted = _convert_dict_list(
+        input_dict, in_memory=False, split_to_list=False, verbose=False
+    )
     if not isinstance(my_data_dict_converted, list):
         my_data_dict_converted = [my_data_dict_converted]
     return pa.Table.from_pandas(pd.DataFrame(my_data_dict_converted))
 
+
 def pyarrow_to_parquet(input_pa, filename):
     pq.write_table(input_pa, filename)
 
+
 def parquet_to_pyarrow(filename):
     return pq.read_table(filename)
 
+
 def pyarrow_to_dict(input_pa):
     return _convert_parrow_data(input_pa)
-     
+
 
 def pyarrow_to_pandas(input_pa):
     return dict_to_pandas(pyarrow_to_dict(input_pa))
 
+
 ####### The functions interesting for the user
 
+
 def dict_to_pandas(input_dict):
     if not isinstance(input_dict, list):
         input_dict = [input_dict]
     return pd.DataFrame(input_dict)
 
+
 def dict_to_awkward(input_dict):
     return ak.from_arrow(dict_to_pyarrow(input_dict))
 
+
 def dict_to_parquet(input_dict, filename):
-    # we could also just go to pandas, and then to parquet. 
+    # we could also just go to pandas, and then to parquet.
     #   dict_to_pandas(input_dict).to_parquet(filename)
     name, ext = os.path.splitext(filename)
     if len(ext) == 0:
-        filename = filename+'.parquet'
+        filename = filename + ".parquet"
     pyarrow_to_parquet(dict_to_pyarrow(input_dict), filename)
 
+
 def dict_to_pickle(input_dict, filename):
     name, ext = os.path.splitext(filename)
     if len(ext) == 0:
-        filename = filename+'.pkl'
-    with open(filename, 'wb') as handle:
+        filename = filename + ".pkl"
+    with open(filename, "wb") as handle:
         pickle.dump(input_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
 
+
 def dict_to_json(input_dict, filename):
-    '''
+    """
     Function provided for convenience, but not of interest for typical use case...
-    '''
+    """
     name, ext = os.path.splitext(filename)
     if len(ext) == 0:
-        filename = filename+'.json'
+        filename = filename + ".json"
     dict_to_pandas(input_dict).to_json(filename)
 
+
 def json_to_pandas(filename):
-    '''
+    """
     Function provided for convenience, but not of interest for typical use case...
-    '''
+    """
     return df.read_json(filename)
 
+
 def pandas_to_dict(input_pandas):
-    '''
+    """
     it converts a pandas dataframe into a pyjapcscout-like dict
      (or list of dicts in case of many records)
-    '''
-    output = input_pandas.to_dict('records')
+    """
+    output = input_pandas.to_dict("records")
     if len(output) == 1:
         output = output[0]
     return output
 
+
 def awkward_to_dict(input_awkward):
-    '''
+    """
     it converts the specified row of an awkward array into a pyjapcscout-like dict
-    '''
+    """
     return _convert_parrow_data(ak.to_arrow(input_awkward))
 
+
 def pickle_to_dict(filename):
-    with open(filename, 'rb') as handle:
+    with open(filename, "rb") as handle:
         load_dict = pickle.load(handle)
     return load_dict
 
+
 def parquet_to_dict(filename):
     return pyarrow_to_dict(parquet_to_pyarrow(filename))
 
+
 # between pandas and awkward
 def pandas_to_awkward(input_pandas):
     return dict_to_awkward(pandas_to_dict(input_pandas))
 
+
 def awkward_to_pandas(input_awkward):
     return dict_to_pandas(awkward_to_dict)
 
+
 # reading from parquet to pandas without type loss
 def parquet_to_pandas(filename):
-    '''
+    """
     It reads a **single** parquet into a pandas dataframe with no data type loss
-    '''
+    """
     return dict_to_pandas(parquet_to_dict(filename))
 
+
 def parquet_to_awkward(filename):
     return ak.from_parquet(filename)
 
 
 ####### Simple save/load functions for the user
 
+
 def _getFilename():
     return datetime.now().strftime("%Y.%m.%d.%H.%M.%S.%f")
 
-def save_dict(dictData, folderPath = None, filename = None, fileFormat='parquet'):
+
+def save_dict(dictData, folderPath=None, filename=None, fileFormat="parquet"):
     if filename == None:
         filename = _getFilename()
     Path(folderPath).mkdir(parents=True, exist_ok=True)
     filename = os.path.join(folderPath, filename)
-    if fileFormat == 'parquet':
-        dict_to_parquet(dictData, filename+'.parquet')
-    elif fileFormat == 'json':
-        dict_to_json(dictData, filename+'.json')
-    elif (fileFormat == 'pickle') or (fileFormat == 'pickledict'):
-        dict_to_pickle(dictData, filename+'.pkl')
-    elif fileFormat == 'mat':
-        raise ValueError('MAT format not yet supported')
-        scipy.io.savemat(filename+'.mat', dictData)
+    if fileFormat == "parquet":
+        dict_to_parquet(dictData, filename + ".parquet")
+    elif fileFormat == "json":
+        dict_to_json(dictData, filename + ".json")
+    elif (fileFormat == "pickle") or (fileFormat == "pickledict"):
+        dict_to_pickle(dictData, filename + ".pkl")
+    elif fileFormat == "mat":
+        raise ValueError("MAT format not yet supported")
+        scipy.io.savemat(filename + ".mat", dictData)
     else:
-        raise ValueError('Unknown file format')
+        raise ValueError("Unknown file format")
+
 
-def load_dict(filename, fileFormat='parquet'):
-    if fileFormat == 'parquet':
+def load_dict(filename, fileFormat="parquet"):
+    if fileFormat == "parquet":
         return parquet_to_dict(filename)
-    elif (fileFormat == 'pickle') or (fileFormat == 'pickledict'):
+    elif (fileFormat == "pickle") or (fileFormat == "pickledict"):
         return pickle_to_dict(filename)
-    elif fileFormat == 'json':
-        raise ValueError('JSON format not yet supported')
-    elif fileFormat == 'mat':
-        raise ValueError('MAT format not yet supported')
-        print('TODO: compatibility with MATLAB generated files?!')
+    elif fileFormat == "json":
+        raise ValueError("JSON format not yet supported")
+    elif fileFormat == "mat":
+        raise ValueError("MAT format not yet supported")
+        print("TODO: compatibility with MATLAB generated files?!")
         return scipy.io.loadmat(filename)
     else:
-        raise ValueError('Unknown file format ({})'.format(fileFormat))
+        raise ValueError("Unknown file format ({})".format(fileFormat))
 
 
 ####### Some additional functions for debugging purposes
 
-def _find_lists(data, verbose = False):
-    '''
+
+def _find_lists(data, verbose=False):
+    """
     Look inside data (assumed to be a dict) and tell if some fields are actually lists.
     In theory, `datascout` package is meant to be used only on dicts that do NOT contain any list!
-    '''
+    """
     for key, value in data.items():
-        if verbose: print(key)
+        if verbose:
+            print(key)
         if isinstance(value, list):
-            print(key+" is a list!")
+            print(key + " is a list!")
         elif isinstance(value, dict):
             _find_lists(value)
         else:
-            if verbose: print(" ..is "+str(type(value)))
+            if verbose:
+                print(" ..is " + str(type(value)))
 
 
-
-def _compare_data(data1, data2, use_assert = False):
-    '''
+def _compare_data(data1, data2, use_assert=False):
+    """
     Compares two dictionaries or lists and show the differences (of type or data type).
     For a full comparison, it is sometimes best to call this function also with inverted
-    '''
+    """
+
     def not_equal(a, b):
-        print('   ------   ')
-        print(str(a) + ' (' + str(type(a)) + ')')
-        print('   NOT EQUAL   ')
-        print(str(b) + ' (' + str(type(b)) + ')')
-        print('   ------   ')
+        print("   ------   ")
+        print(str(a) + " (" + str(type(a)) + ")")
+        print("   NOT EQUAL   ")
+        print(str(b) + " (" + str(type(b)) + ")")
+        print("   ------   ")
         if use_assert:
-            raise AssertionError('{} not equal to {}'.format(a, b))
-    
-    if (type(data1) != type(data2)) or (hasattr(data1, '__len__') and (len(data1) != len(data2))):
+            raise AssertionError("{} not equal to {}".format(a, b))
+
+    if (type(data1) != type(data2)) or (
+        hasattr(data1, "__len__") and (len(data1) != len(data2))
+    ):
         not_equal(data1, data2)
     elif isinstance(data1, list):
         for i in range(len(data1)):
@@ -429,5 +495,3 @@ def _compare_data(data1, data2, use_assert = False):
                 _compare_data(data1.flatten()[i], data2.flatten()[i], use_assert)
         else:
             not_equal(data1, data2)
-
-
diff --git a/datascout/tests/test_dataconversion.py b/datascout/tests/test_dataconversion.py
index d9f62a7..9d34696 100644
--- a/datascout/tests/test_dataconversion.py
+++ b/datascout/tests/test_dataconversion.py
@@ -14,56 +14,165 @@ import datetime
 import copy
 import os
 
+
 def generate_data_dict():
-    '''
+    """
     Simply generate a dictionary with some values that should be compatible with this package.
-    Note: only 'device1' contains random data... 
-    '''
-    return {'device1': {'value': {  'property1':np.int8(np.random.rand(1)[0]*10**2),
-                                    'property2':np.int8(np.random.rand(1,43)*10**2),
-                                    'property3':np.int8(np.random.rand(10,3)*10**2),
-                                    'property4':np.int8(np.random.rand(50,1)*10**2)},
-                       'header': {'acqStamp':np.int64(np.random.rand(1)[0]*10**12),'cycleStamp':np.int64(np.random.rand(1)[0]*10**12)},
-                       'exception': ''},
-            'device2': {'value': np.array([[1, 12], [4, 5], [1, 2]], dtype=np.int16), 
-                       'header': {'acqStamp':np.int64(44444),'cycleStamp':np.int64(3455445)},
-                       'exception': ''},
-            'device3': {'value': '',
-                       'header': {'acqStamp':np.int64(44444),'cycleStamp':np.int64(0)},
-                       'exception': 'Cipolla'},
-            'device4': {'value': { 'property5':'This is string',
-                                   'property6':np.array(['my', 'list'], dtype=str), #np.str_ or object? -> np.str_!
-                                   'property7':np.array([['my'], ['list'],['long']], dtype=str),
-                                   'property8':np.array([['my', 'list'], ['of', 'more'], ['val', 'string']], dtype=str),
-                                   },
-                       'header': {'acqStamp':np.int64(55555),'cycleStamp':np.int64(3455445)},
-                       'exception': ''},
-            'device5': {'value': { 'property9':{'JAPC_FUNCTION': {'X': np.array([1, 2, 3, 4], dtype=np.float64), 'Y':np.array([14, 2, 7, 5], dtype=np.float64)}},
-                                   'property6':np.array([{'JAPC_FUNCTION': {'X': np.array([1, 2], dtype=np.float64), 'Y':np.array([14, 2], dtype=np.float64)}}, {'JAPC_FUNCTION':{'X': np.array([3, 4], dtype=np.float64), 'Y':np.array([7, 5], dtype=np.float64)}}], dtype=object),
-                                   },
-                       'header': {'acqStamp':np.int64(4444444),'cycleStamp':np.int64(0)},
-                       'exception': ''},
-            'device6': {'value': {'JAPC_FUNCTION': {'X': np.array([1, 2, 3, 4], dtype=np.float64), 'Y':np.array([14, 2, 7, 5], dtype=np.float64)}},
-                       'header': {'acqStamp':np.int64(4455444),'cycleStamp':np.int64(0)},
-                       'exception': ''},
-            'device7': {'value': { 'property10':{'JAPC_ENUM':{'code':np.int64(2), 'string':'piero'}},
-                                   'property11':np.array([{'JAPC_ENUM':{'code':np.int64(3), 'string':'carlo'}}, {'JAPC_ENUM':{'code':np.int64(4), 'string':'micio'}}], dtype=object),
-                                   'property12':{'JAPC_ENUM_SET':{'codes':np.array([2, 8], dtype=np.int64), 'aslong':np.int64(123), 'strings':np.array(['nieva','po'], dtype=str)}}, #np.str_
-                                   'property13':np.array([{'JAPC_ENUM_SET':{'codes':np.array([7,44], dtype=np.int64), 'aslong':np.int64(123), 'strings':np.array(['nieva','po'], dtype=str)}},
-                                                {'JAPC_ENUM_SET':{'codes':np.array([5,6], dtype=np.int64), 'aslong':np.int64(77), 'strings':np.array(['nettuno','plutone'], dtype=str)}}
-                                                ], dtype=object),
-                                   'property14':np.array([{'JAPC_ENUM_SET':{'codes':np.array([], dtype=np.int64), 'aslong':np.int64(0), 'strings':np.array([], dtype=str)}},
-                                                {'JAPC_ENUM_SET':{'codes':np.array([5,6], dtype=np.int64), 'aslong':np.int64(77), 'strings':np.array(['nettuno','plutone'], dtype=str)}}
-                                                ], dtype=object)},
-                       'header': {'acqStamp':np.int64(44333444),'cycleStamp':np.int64(0)},
-                       'exception': ''},
-            'device8': {'value': {'JAPC_ENUM_SET':{'codes':np.array([2, 8], dtype=np.int64), 'aslong':np.int64(123), 'strings':np.array(['nieva','po'], dtype=str)}},
-                       'header': {'acqStamp':np.int64(4),'cycleStamp':np.int64(0)},
-                       'exception': 'no data for xxxx'},
-            'device9': {'value': {'cipolla' : np.array([], dtype=str) },
-                       'header': {'acqStamp':np.int64(4),'cycleStamp':np.int64(0)},
-                       'exception': 'no data for xxxx'}}
-
+    Note: only 'device1' contains random data...
+    """
+    return {
+        "device1": {
+            "value": {
+                "property1": np.int8(np.random.rand(1)[0] * 10 ** 2),
+                "property2": np.int8(np.random.rand(1, 43) * 10 ** 2),
+                "property3": np.int8(np.random.rand(10, 3) * 10 ** 2),
+                "property4": np.int8(np.random.rand(50, 1) * 10 ** 2),
+            },
+            "header": {
+                "acqStamp": np.int64(np.random.rand(1)[0] * 10 ** 12),
+                "cycleStamp": np.int64(np.random.rand(1)[0] * 10 ** 12),
+            },
+            "exception": "",
+        },
+        "device2": {
+            "value": np.array([[1, 12], [4, 5], [1, 2]], dtype=np.int16),
+            "header": {"acqStamp": np.int64(44444), "cycleStamp": np.int64(3455445)},
+            "exception": "",
+        },
+        "device3": {
+            "value": "",
+            "header": {"acqStamp": np.int64(44444), "cycleStamp": np.int64(0)},
+            "exception": "Cipolla",
+        },
+        "device4": {
+            "value": {
+                "property5": "This is string",
+                "property6": np.array(
+                    ["my", "list"], dtype=str
+                ),  # np.str_ or object? -> np.str_!
+                "property7": np.array([["my"], ["list"], ["long"]], dtype=str),
+                "property8": np.array(
+                    [["my", "list"], ["of", "more"], ["val", "string"]], dtype=str
+                ),
+            },
+            "header": {"acqStamp": np.int64(55555), "cycleStamp": np.int64(3455445)},
+            "exception": "",
+        },
+        "device5": {
+            "value": {
+                "property9": {
+                    "JAPC_FUNCTION": {
+                        "X": np.array([1, 2, 3, 4], dtype=np.float64),
+                        "Y": np.array([14, 2, 7, 5], dtype=np.float64),
+                    }
+                },
+                "property6": np.array(
+                    [
+                        {
+                            "JAPC_FUNCTION": {
+                                "X": np.array([1, 2], dtype=np.float64),
+                                "Y": np.array([14, 2], dtype=np.float64),
+                            }
+                        },
+                        {
+                            "JAPC_FUNCTION": {
+                                "X": np.array([3, 4], dtype=np.float64),
+                                "Y": np.array([7, 5], dtype=np.float64),
+                            }
+                        },
+                    ],
+                    dtype=object,
+                ),
+            },
+            "header": {"acqStamp": np.int64(4444444), "cycleStamp": np.int64(0)},
+            "exception": "",
+        },
+        "device6": {
+            "value": {
+                "JAPC_FUNCTION": {
+                    "X": np.array([1, 2, 3, 4], dtype=np.float64),
+                    "Y": np.array([14, 2, 7, 5], dtype=np.float64),
+                }
+            },
+            "header": {"acqStamp": np.int64(4455444), "cycleStamp": np.int64(0)},
+            "exception": "",
+        },
+        "device7": {
+            "value": {
+                "property10": {"JAPC_ENUM": {"code": np.int64(2), "string": "piero"}},
+                "property11": np.array(
+                    [
+                        {"JAPC_ENUM": {"code": np.int64(3), "string": "carlo"}},
+                        {"JAPC_ENUM": {"code": np.int64(4), "string": "micio"}},
+                    ],
+                    dtype=object,
+                ),
+                "property12": {
+                    "JAPC_ENUM_SET": {
+                        "codes": np.array([2, 8], dtype=np.int64),
+                        "aslong": np.int64(123),
+                        "strings": np.array(["nieva", "po"], dtype=str),
+                    }
+                },  # np.str_
+                "property13": np.array(
+                    [
+                        {
+                            "JAPC_ENUM_SET": {
+                                "codes": np.array([7, 44], dtype=np.int64),
+                                "aslong": np.int64(123),
+                                "strings": np.array(["nieva", "po"], dtype=str),
+                            }
+                        },
+                        {
+                            "JAPC_ENUM_SET": {
+                                "codes": np.array([5, 6], dtype=np.int64),
+                                "aslong": np.int64(77),
+                                "strings": np.array(["nettuno", "plutone"], dtype=str),
+                            }
+                        },
+                    ],
+                    dtype=object,
+                ),
+                "property14": np.array(
+                    [
+                        {
+                            "JAPC_ENUM_SET": {
+                                "codes": np.array([], dtype=np.int64),
+                                "aslong": np.int64(0),
+                                "strings": np.array([], dtype=str),
+                            }
+                        },
+                        {
+                            "JAPC_ENUM_SET": {
+                                "codes": np.array([5, 6], dtype=np.int64),
+                                "aslong": np.int64(77),
+                                "strings": np.array(["nettuno", "plutone"], dtype=str),
+                            }
+                        },
+                    ],
+                    dtype=object,
+                ),
+            },
+            "header": {"acqStamp": np.int64(44333444), "cycleStamp": np.int64(0)},
+            "exception": "",
+        },
+        "device8": {
+            "value": {
+                "JAPC_ENUM_SET": {
+                    "codes": np.array([2, 8], dtype=np.int64),
+                    "aslong": np.int64(123),
+                    "strings": np.array(["nieva", "po"], dtype=str),
+                }
+            },
+            "header": {"acqStamp": np.int64(4), "cycleStamp": np.int64(0)},
+            "exception": "no data for xxxx",
+        },
+        "device9": {
+            "value": {"cipolla": np.array([], dtype=str)},
+            "header": {"acqStamp": np.int64(4), "cycleStamp": np.int64(0)},
+            "exception": "no data for xxxx",
+        },
+    }
 
 
 def generic_test_data_conversion(my_data_dict):
@@ -72,26 +181,30 @@ def generic_test_data_conversion(my_data_dict):
 
     # go to panda and back without altering initial data
     my_pandas = datascout.dict_to_pandas(my_data_dict)
-    datascout._compare_data(my_data_dict, my_data_dict_ref, use_assert = True)
+    datascout._compare_data(my_data_dict, my_data_dict_ref, use_assert=True)
     my_data_back = datascout.pandas_to_dict(my_pandas)
-    datascout._compare_data(my_data_back, my_data_dict_ref, use_assert = True)
+    datascout._compare_data(my_data_back, my_data_dict_ref, use_assert=True)
 
     # go to pyarrow and back without altering initial data
     my_pyarrow = datascout.dict_to_pyarrow(my_data_dict)
-    datascout._compare_data(my_data_dict, my_data_dict_ref, use_assert = True)
+    datascout._compare_data(my_data_dict, my_data_dict_ref, use_assert=True)
     my_data_back = datascout.pyarrow_to_dict(my_pyarrow)
-    datascout._compare_data(my_data_back, my_data_dict_ref, use_assert = True)
+    datascout._compare_data(my_data_back, my_data_dict_ref, use_assert=True)
 
     # go to awkward and back without altering initial data
     my_ak = datascout.dict_to_awkward(my_data_dict)
-    datascout._compare_data(my_data_dict, my_data_dict_ref, use_assert = True)
+    datascout._compare_data(my_data_dict, my_data_dict_ref, use_assert=True)
     my_data_back = datascout.awkward_to_dict(my_ak)
-    datascout._compare_data(my_data_back, my_data_dict_ref, use_assert = True)
+    datascout._compare_data(my_data_back, my_data_dict_ref, use_assert=True)
 
     # a long chain
-    my_data_back = datascout.awkward_to_dict(datascout.pandas_to_awkward(datascout.pyarrow_to_pandas(datascout.dict_to_pyarrow(my_data_dict))))
-    datascout._compare_data(my_data_dict, my_data_dict_ref, use_assert = True)
-    datascout._compare_data(my_data_back, my_data_dict_ref, use_assert = True)
+    my_data_back = datascout.awkward_to_dict(
+        datascout.pandas_to_awkward(
+            datascout.pyarrow_to_pandas(datascout.dict_to_pyarrow(my_data_dict))
+        )
+    )
+    datascout._compare_data(my_data_dict, my_data_dict_ref, use_assert=True)
+    datascout._compare_data(my_data_back, my_data_dict_ref, use_assert=True)
 
 
 def generic_test_save_load(tmpdir, my_data_dict):
@@ -99,20 +212,20 @@ def generic_test_save_load(tmpdir, my_data_dict):
     my_data_dict_ref = copy.deepcopy(my_data_dict)
 
     # define temporary filename
-    temp_filename_parquet = os.path.join(str(tmpdir), 'test.parquet')
-    temp_filename_pickle = os.path.join(str(tmpdir), 'test.pkl')
+    temp_filename_parquet = os.path.join(str(tmpdir), "test.parquet")
+    temp_filename_pickle = os.path.join(str(tmpdir), "test.pkl")
 
     # go to parquet
     datascout.dict_to_parquet(my_data_dict, temp_filename_parquet)
-    datascout._compare_data(my_data_dict, my_data_dict_ref, use_assert = True)
+    datascout._compare_data(my_data_dict, my_data_dict_ref, use_assert=True)
     my_data_back = datascout.parquet_to_dict(temp_filename_parquet)
-    datascout._compare_data(my_data_back, my_data_dict_ref, use_assert = True)
+    datascout._compare_data(my_data_back, my_data_dict_ref, use_assert=True)
 
     # go to pickle
     datascout.dict_to_pickle(my_data_dict, temp_filename_pickle)
-    datascout._compare_data(my_data_dict, my_data_dict_ref, use_assert = True)
+    datascout._compare_data(my_data_dict, my_data_dict_ref, use_assert=True)
     my_data_back = datascout.pickle_to_dict(temp_filename_pickle)
-    datascout._compare_data(my_data_back, my_data_dict_ref, use_assert = True)
+    datascout._compare_data(my_data_back, my_data_dict_ref, use_assert=True)
 
 
 def test_data_conversion():
@@ -128,12 +241,14 @@ def test_data_array_conversion():
     # test it
     generic_test_data_conversion(my_data_dict)
 
+
 def test_save_load(tmpdir):
     # generate dataset
     my_data_dict = generate_data_dict()
     # test it
     generic_test_save_load(tmpdir, my_data_dict)
 
+
 def test_save_load_array(tmpdir):
     # generate dataset
     my_data_dict = [generate_data_dict(), generate_data_dict(), generate_data_dict()]
@@ -142,9 +257,9 @@ def test_save_load_array(tmpdir):
 
 
 # Function above can be locally tests as:
-'''
+"""
 from pathlib import Path
 tmpdir=Path('.')
 test_save_load(tmpdir)
 test_save_load_array(tmpdir)
-'''
\ No newline at end of file
+"""
diff --git a/examples/000_example.py b/examples/000_example.py
index 4fb793b..1fee7df 100644
--- a/examples/000_example.py
+++ b/examples/000_example.py
@@ -4,42 +4,62 @@ from datetime import timezone
 import datetime
 import numpy as np
 
+
 def convert_2d_array(my_array):
-    if len(np.shape(my_array))==1: return my_array
-    list=[]
+    if len(np.shape(my_array)) == 1:
+        return my_array
+    list = []
     for jj in range(np.shape(my_array)[0]):
-        list.append(my_array[jj,:])
-    return list    
+        list.append(my_array[jj, :])
+    return list
 
 
 def old2new(my_dict, verbose=False):
     for device_name, device_value in my_dict.items():
-        if verbose: print(device_name)
-        device_value['header']['acqStamp'] = \
-            np.int64(device_value['header']['acqStamp'].replace(tzinfo=timezone.utc).timestamp()*1e9)
-        device_value['header']['cycleStamp'] = \
-            np.int64(device_value['header']['cycleStamp'].replace(tzinfo=timezone.utc).timestamp()*1e6)*1e3
-        device_value['header']['setStamp'] = \
-            np.int64(device_value['header']['setStamp'].replace(tzinfo=timezone.utc).timestamp()*1e9)
-        if (not type(device_value['value'])==dict) and (not type(device_value['value'])==list):
-            if math.isnan(device_value['value']):
-                device_value['value']= 'no data'
-            if type(device_value['value'])==np.ndarray:
-                device_value['value']= convert_2d_array(device_value['value'])
-        if type(device_value['value'])==dict:
-            for value_name, value_value in device_value['value'].items():
-                if verbose: print(device_value['value'][value_name])
-                if type(device_value['value'][value_name])==np.ndarray:
-                    device_value['value'][value_name]=convert_2d_array(value_value)
+        if verbose:
+            print(device_name)
+        device_value["header"]["acqStamp"] = np.int64(
+            device_value["header"]["acqStamp"].replace(tzinfo=timezone.utc).timestamp()
+            * 1e9
+        )
+        device_value["header"]["cycleStamp"] = (
+            np.int64(
+                device_value["header"]["cycleStamp"]
+                .replace(tzinfo=timezone.utc)
+                .timestamp()
+                * 1e6
+            )
+            * 1e3
+        )
+        device_value["header"]["setStamp"] = np.int64(
+            device_value["header"]["setStamp"].replace(tzinfo=timezone.utc).timestamp()
+            * 1e9
+        )
+        if (not type(device_value["value"]) == dict) and (
+            not type(device_value["value"]) == list
+        ):
+            if math.isnan(device_value["value"]):
+                device_value["value"] = "no data"
+            if type(device_value["value"]) == np.ndarray:
+                device_value["value"] = convert_2d_array(device_value["value"])
+        if type(device_value["value"]) == dict:
+            for value_name, value_value in device_value["value"].items():
+                if verbose:
+                    print(device_value["value"][value_name])
+                if type(device_value["value"][value_name]) == np.ndarray:
+                    device_value["value"][value_name] = convert_2d_array(value_value)
     return my_dict
 
-#my_parquet = '/eos/project/l/liu/datascout/parquet_file/2021.04.30.01.54.47.454151.parquet'
-my_dict_file = '/eos/project/l/liu/datascout/pickle_files/2021.04.30.01.54.47.454151.pkl'
 
-#ds.parquet_to_dict(my_parquet)
+# my_parquet = '/eos/project/l/liu/datascout/parquet_file/2021.04.30.01.54.47.454151.parquet'
+my_dict_file = (
+    "/eos/project/l/liu/datascout/pickle_files/2021.04.30.01.54.47.454151.pkl"
+)
+
+# ds.parquet_to_dict(my_parquet)
 my_dict = ds.pickle_to_dict(my_dict_file)
 
-a=old2new(my_dict[0])
+a = old2new(my_dict[0])
 
 # ONLINE
 ds.dict_to_awkward(a)
@@ -47,21 +67,16 @@ ds.dict_to_pyarrow(a)
 ds.dict_to_pandas(a)
 
 # OFFLINE
-ds.dict_to_parquet(a, 'test') 
-ds.dict_to_json(a, 'test')
-ds.dict_to_pickle(a, 'test')
-
-# 
-print('parquet_to_pyarrow')
-ds.parquet_to_pyarrow('test.parquet')
-
-print('parquet_to_pandas')
-ds.parquet_to_pandas('test.parquet')
-
-print('parquet_to_awkward')
-ds.parquet_to_awkward('test.parquet')
-
-
+ds.dict_to_parquet(a, "test")
+ds.dict_to_json(a, "test")
+ds.dict_to_pickle(a, "test")
 
+#
+print("parquet_to_pyarrow")
+ds.parquet_to_pyarrow("test.parquet")
 
+print("parquet_to_pandas")
+ds.parquet_to_pandas("test.parquet")
 
+print("parquet_to_awkward")
+ds.parquet_to_awkward("test.parquet")
diff --git a/setup.py b/setup.py
index a3e9b85..c847b9b 100644
--- a/setup.py
+++ b/setup.py
@@ -10,59 +10,51 @@ from setuptools import setup, find_packages
 
 
 HERE = Path(__file__).parent.absolute()
-with (HERE / 'README.md').open('rt') as fh:
+with (HERE / "README.md").open("rt") as fh:
     LONG_DESCRIPTION = fh.read().strip()
 
 
 REQUIREMENTS: dict = {
-    'core': [
-        'numpy',
-        'pandas',
-        'pyarrow',
-        'awkward',
-        'datetime',
-        'pathlib'
+    "core": ["numpy", "pandas", "pyarrow", "awkward", "datetime", "pathlib"],
+    "test": [
+        "pytest",
     ],
-    'test': [
-        'pytest',
-    ],
-    'dev': [
+    "dev": [
         # 'requirement-for-development-purposes-only',
     ],
-    'doc': [
-        'sphinx',
-        'acc-py-sphinx',
+    "doc": [
+        "sphinx",
+        "acc-py-sphinx",
     ],
 }
 
 
 setup(
-    name='datascout',
+    name="datascout",
     version="0.0.1.dev0",
-
-    author='Davide Gamba',
-    author_email='davide.gamba@cern.ch',
-    description='SHORT DESCRIPTION OF PROJECT',
+    author="Davide Gamba",
+    author_email="davide.gamba@cern.ch",
+    description="SHORT DESCRIPTION OF PROJECT",
     long_description=LONG_DESCRIPTION,
-    long_description_content_type='text/markdown',
-    url='',
-
+    long_description_content_type="text/markdown",
+    url="",
     packages=find_packages(),
-    python_requires='~=3.7',
+    python_requires="~=3.7",
     classifiers=[
         "Programming Language :: Python :: 3",
         "Operating System :: OS Independent",
     ],
-
-    install_requires=REQUIREMENTS['core'],
+    install_requires=REQUIREMENTS["core"],
     extras_require={
         **REQUIREMENTS,
         # The 'dev' extra is the union of 'test' and 'doc', with an option
         # to have explicit development dependencies listed.
-        'dev': [req
-                for extra in ['dev', 'test', 'doc']
-                for req in REQUIREMENTS.get(extra, [])],
+        "dev": [
+            req
+            for extra in ["dev", "test", "doc"]
+            for req in REQUIREMENTS.get(extra, [])
+        ],
         # The 'all' extra is the union of all requirements.
-        'all': [req for reqs in REQUIREMENTS.values() for req in reqs],
+        "all": [req for reqs in REQUIREMENTS.values() for req in reqs],
     },
 )
-- 
GitLab