Skip to content
Snippets Groups Projects
Commit 2394ce45 authored by Guido Sterbini's avatar Guido Sterbini
Browse files

Adding a simple example

parent 6c08b2d2
No related branches found
No related tags found
No related merge requests found
Pipeline #2593253 failed
# datascout
Simple package to handle data saving and reading with minimum required libraries.
Mainly used as dependance of pyjapcscout, but it can be used for other purposes as well.
Mainly used as dependance of `pyjapcscout`, but it can be used for other purposes as well.
## Purpose of this project
The idea is to provide a few sweet functions go from a nested dict of numpy arrays to parquet (and to pickle, and json) and come back.
The idea is to provide a few sweet functions to go from a nested `dict` of `numpy` arrays to `parquet` (and to `pickle`, and `json`) and come back **preserving** the data types. The aspect related to data types preservation is important for the roud-trip of meachine parameter reading, saving and settings.
## Getting started
## How to develop it:
I set up this package as:
......
import datascout as ds
import math
from datetime import timezone
import datetime
import numpy as np
def convert_2d_array(my_array):
if len(np.shape(my_array))==1: return my_array
list=[]
for jj in range(np.shape(my_array)[0]):
list.append(my_array[jj,:])
return list
def old2new(my_dict, verbose=False):
for device_name, device_value in my_dict.items():
if verbose: print(device_name)
device_value['header']['acqStamp'] = \
np.int64(device_value['header']['acqStamp'].replace(tzinfo=timezone.utc).timestamp()*1e9)
device_value['header']['cycleStamp'] = \
np.int64(device_value['header']['cycleStamp'].replace(tzinfo=timezone.utc).timestamp()*1e6)*1e3
device_value['header']['setStamp'] = \
np.int64(device_value['header']['setStamp'].replace(tzinfo=timezone.utc).timestamp()*1e9)
if (not type(device_value['value'])==dict) and (not type(device_value['value'])==list):
if math.isnan(device_value['value']):
device_value['value']= 'no data'
if type(device_value['value'])==np.ndarray:
device_value['value']= convert_2d_array(device_value['value'])
if type(device_value['value'])==dict:
for value_name, value_value in device_value['value'].items():
if verbose: print(device_value['value'][value_name])
if type(device_value['value'][value_name])==np.ndarray:
device_value['value'][value_name]=convert_2d_array(value_value)
return my_dict
#my_parquet = '/eos/project/l/liu/datascout/parquet_file/2021.04.30.01.54.47.454151.parquet'
my_dict_file = '/eos/project/l/liu/datascout/pickle_files/2021.04.30.01.54.47.454151.pkl'
#ds.parquet_to_dict(my_parquet)
my_dict = ds.pickle_to_dict(my_dict_file)
a=old2new(my_dict[0])
# ONLINE
ds.dict_to_awkward(a)
ds.dict_to_pyarrow(a)
ds.dict_to_pandas(a)
# OFFLINE
ds.dict_to_parquet(a, 'test')
ds.dict_to_json(a, 'test')
ds.dict_to_pickle(a, 'test')
#
print('parquet_to_pyarrow')
ds.parquet_to_pyarrow('test.parquet')
print('parquet_to_pandas')
ds.parquet_to_pandas('test.parquet')
print('parquet_to_awkward')
ds.parquet_to_awkward('test.parquet')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment