diff --git a/examples/001_example/001.py b/examples/001_example/001.py deleted file mode 100644 index e18ddc0f3eed5c0070b01540f7480bffd6c69667..0000000000000000000000000000000000000000 --- a/examples/001_example/001.py +++ /dev/null @@ -1,294 +0,0 @@ -# %% -""" -### Introduction - -See https://codimd.web.cern.ch/p/0QX9ebi1bn#/ for the latest version. - -Our community is often confronted with the need of running complex algorithms for a set of different input. -E.g. a DA computation with tune scan + beam-beam + errors. - -This implies to stage the algorithm in different steps corresponding, sometimes, to different codes (MADX, SixTrack,...) and/or different hardware (local CPU, GPU, HTCondor/LSF clusters, BOINC...). - -The topic of this brainstorming is to discuss about a python package that could convey a **standard** approach in order to - -- avoid re-inventing the wheel each time, -- improve the way we share our work-flow for the different simulations, -- provide a standard way to babysitting the simulations and postprocess the output. - -Clearly the package can be integrated with other solutions (see next [presentation]()). - -The challenge here is to maintain a good balance between simplicity (to be user-friendly) and flexibility (to cover a large gamut of use cases). - -You can find at https://gitlab.cern.ch/abpcomputing/sandbox/tree_maker a proposal. -We are going first to present its rationale (a bit abstract, 5 min) and then explore together a simple example (pragmatic and complementary to the first part, 15 min). - - -### Rationale - -The general way to describe our problem (running a staged algorithm for a set of different input) is to associate a **job** for each stage and input. - -A job can be represented as a **node** in a **graph** (nodes connected with edges). - -The main idea is to downscale the problem of a generic graph to a simpler graph, a **tree**. - -A **tree** is a simplified [**DAG**](https://en.wikipedia.org/wiki/Directed_acyclic_graph) (Directed Acycled Graphs) where each node can have maximum one parent. -The tree is convenient since it can be directly mapped into a file system (the folder stucture of a file system is a tree). - -In python a tree can be represented, for example, with the `anytree` package (see [000_example](https://gitlab.cern.ch/abpcomputing/sandbox/tree_maker/-/blob/master/examples/000_example/000.ipynb)). - -The `anynode` object of the `anytree` package can be generalized to any class. -Indeed we generalized it to our `NodeJob` class, inheriting all the methods/attributes of `anynode`, e.g., root, parent, children, ancestors, siblings, leaves, depth, height, searching/filtering methods... - -The main ideas is that each node of our simulation tree - -1. is a instance of the `NodeJob` (extending the `anytree`). -2. refers to a **template node** (example a MadX mask): `NodeJob.template_path` -3. has a specific dictionary of input, `NodeJob.dictionary` -4. is mapped to a file system, `NodeJob.path` -5. has a specific submit command, `NodeJob.submit_command` -6. has a specific log file, `NodeJob.log_path` - - -The users should spend 99% of their time on the physics (the templates, each template is well "isolated" for a deep understanding of its physics), and use the package to build/orchestrate the tree. - -#### Building of the tree -The building of the tree is done in three steps: -- istantiating the nodes -- **cloning** (i.e. copying) the templates on the NodeJob.path -- **mutating** (i.e. changing) the input of the template with the info in the NodeJob.dictionary - - -#### Orchestrating the tree - -Each node can be run (refers to NodeJob.submit_command) and logged (NodeJob.submit_command). -One can orchestrate the simulation but writing and reading in the different log. - -We will show now a simple example to clarify all these ingredients. -In this way we can factorize the physics (the template), the parameters (the dictionary), the folder (JobNode.path) but maintaining for all nodes the very same interface (`JobNode`). - - - - -### Simple example ([001_example](https://gitlab.cern.ch/abpcomputing/sandbox/tree_maker/-/blob/master/examples/001_example/001.ipynb)) - - -Let aussume that we need to make this computation - -$\sqrt{|(a+b)\times c|}$ - -and we want to compute the standard deviation of the result assuming that a, b and c are normal distributed independent variables. Clearly the problem is quite naive but we want to address it as if we will need a cluster to solve it. - -For example, we can partition the problem in three conscutive stages - -1. A sum: $(a+b)$ -2. A multiplication of the result 1 with c: $(a+b)\times c$ -3. A sqrt of the result of 2: $\sqrt{|(a+b)\times c|}$ - -For each stage we build a template. -Documentation (only started, you need to be on GPN) can be found at https://acc-py.web.cern.ch/gitlab/abpcomputing/sandbox/tree_maker/docs/master/. -""" - -# %% -import tree_maker -from tree_maker import NodeJob - -# %% -# Clearly for this easy task on can do all in the very same python kernel -# BUT here we want to mimic the typical flow -# 1. MADX for optics matching/error seeding -# 2. Tracking for FMA and or DA studies -# 3. simulation baby-sitting and -# 4. postprocessing - -import numpy as np -a=np.random.randn(4) -b=np.random.randn(4) -c=np.random.randn(2) - -my_list_original=[] -for ii in c: - my_list_original+=list(np.sqrt(np.abs((a+b)*ii))) -my_list_original=sorted(my_list_original) - -# %% -""" -#### The root of the tree -""" - -# %% -#root -root = NodeJob(name='root', parent=None) -root.path = '/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000' -root.template_path = root.path + '/../templates' -root.log_file = root.path + "/log.yaml" - -# %% -""" -#### First generation of nodes -""" - -# %% -#first generation -for node in root.root.generation(0): - node.children=[NodeJob(name=f"{child:03}", - parent=node, - path=f"{node.path}/{child:03}", - template_path = root.template_path+'/sum_it', - submit_command = f'python run.py', - log_file=f"{node.path}/{child:03}/log.yaml", - dictionary={'a':float(a[child]), - 'b':float(b[child]) - }) - for child in range(len(a))] - -# To combine different lists one can use the product or the zip functions -#import itertools -#[[i, j, z] for i, j, z in itertools.product(['a','b'],['c','d'],[1,2,3])] -#[[i, j, z] for i, j, z in zip(['a','b'],['c','d'],[1,2,3])] -root.print_it() - -# %% -""" -#### Second generation of nodes -""" - -# %% -#second generation -for node in root.root.generation(1): - node.children=[NodeJob(name=f"{child:03}", - parent=node, - path = f"{node.path}/{child:03}", - template_path = root.template_path+'/multiply_it', - submit_command = f'python run.py', - log_file=f"{node.path}/{child:03}/log.yaml", - dictionary={'c': float(c[child])}) - for child in range(len(c))] -root.print_it() - -# %% -""" -#### Third generation of nodes -""" - -# %% -#third generation -for node in root.root.generation(2): - node.children=[NodeJob(name=f"{child:03}", - parent=node, - path = f"{node.path}/{child:03}", - template_path = root.template_path+'/square_root_it', - submit_command = f'python run.py', - log_file=f"{node.path}/{child:03}/log.yaml", - dictionary={'log_file': f"{node.path}/{child:03}/log.yaml"}) - for child in range(1)] -root.print_it() - -# %% -# we can inspect the data structure -root.children[3].children[1].children[0].submit_command - -# %% -# or we can modify the attributes of the tree -if False: - for i, node in enumerate(root.leaves): - if i>3: - print(i) - node.submit_command = f'condor_submit run.sub -batch-name square_root' - -# %% -# we can transfer the information of the tree in a yaml for the orchestration later -root.to_yaml() - -# %% -""" -### Cloning the templates of the nodes -From python objects we move the nodes to the file-system. -""" - -# %% -# We map the pythonic tree in a >folder< tree -root.clean_log() -root.rm_children_folders() -for depth in range(root.height): - [x.clone_children() for x in root.generation(depth)] - -# VERY IMPORTANT, tagging -root.tag_as('cloned') - -# %% -""" -### Launching the jobs -""" - -# %% -root.tag_as('launched') -for node in root.generation(1): - node.cleanlog_mutate_submit() - -# %% -for node in root.generation(2): - node.cleanlog_mutate_submit() - -# %% -for node in root.generation(3): - node.cleanlog_mutate_submit() - -# %% -# check if all root descendants are completed -if all([descendant.has_been('completed') for descendant in root.descendants]): - root.tag_as('completed') - print('All jobs are completed!') - -# %% -""" -### Post-processing -""" - -# %% -# retrieve the output -my_list=[] -for node in root.leaves: - output = tree_maker.from_yaml(node.path+'/output.yaml') - my_list.append(output['result']) - -# %% -# sanity check -assert any(np.array(sorted(my_list))-np.array(my_list_original))==0 - -# %% -# std of the results -np.std(my_list) - -# %% -""" -### Monitoring -""" - -# %% -root=tree_maker.tree_from_yaml(f'/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/tree.yaml') - -# %% -# checking the status -my_filter = lambda node: node.depth==2 and node.has_been('completed') -for node in root.descendants: - if my_filter(node): - print(node.path) - -# one can also use root.find(filter_= lambda node: node.depth==1 and node.has_been('completed')) - -# %% -def my_test(node): - output = tree_maker.from_yaml(node.path+'/output.yaml') - return node.is_leaf and node.has_been('completed') and output['result']<1.2 - -for node in root.descendants: - if my_test(node): - print(node.path) - -# %% -#or (better) -for node in root.generation(3): - if my_test(node): - print(node.path) - -# %% diff --git a/examples/001_example/001_chronjob.py b/examples/001_example/001_chronjob.py index 9ec2a712e53088d7b291899140679b53dbc09b1d..954bdf1bc47ec579be87ad08d07e5dc2b251bba7 100644 --- a/examples/001_example/001_chronjob.py +++ b/examples/001_example/001_chronjob.py @@ -24,4 +24,4 @@ else: node.smart_run() if all([descendant.has_been('completed') for descendant in root.descendants]): root.tag_as('completed') - print('All descendants of root are completed!') \ No newline at end of file + print('All descendants of root are completed!') diff --git a/examples/001_example/templates/multiply_it/output.yaml b/examples/001_example/templates/multiply_it/output.yaml deleted file mode 100644 index 0c204052cb219aa0de8efdd821735c6429dc9eac..0000000000000000000000000000000000000000 --- a/examples/001_example/templates/multiply_it/output.yaml +++ /dev/null @@ -1 +0,0 @@ -result: 1 diff --git a/examples/001_example/templates/square_root_it/output.yaml b/examples/001_example/templates/square_root_it/output.yaml deleted file mode 100644 index 832277ebbe912aaa146a7cd5e77e34a1f8e541cf..0000000000000000000000000000000000000000 --- a/examples/001_example/templates/square_root_it/output.yaml +++ /dev/null @@ -1 +0,0 @@ -result: 1.0 diff --git a/examples/001_example/templates/sum_it/config.yaml b/examples/001_example/templates/sum_it/config.yaml index cfaf5e97b89ace9550acb66b9fd6fe5162dc7ff0..a87ce4d7e300fd2ac7fd0b1b673ff95961a14eea 100644 --- a/examples/001_example/templates/sum_it/config.yaml +++ b/examples/001_example/templates/sum_it/config.yaml @@ -1,5 +1,5 @@ # This is my input -a: 0 # this is the first element of the sum +a: -1 # this is the first element of the sum b: -1 # this is the second element of the sum run_command: 'python run.py' -log_file: './log.yaml' \ No newline at end of file +log_file: './log.yaml' diff --git a/examples/001_example/templates/sum_it/output.yaml b/examples/001_example/templates/sum_it/output.yaml index 2b7ccfe6dbad97a75a5ff599c204e96b78c287e6..49f08b507230fa33cd9831b0fa3b1876f5b19d4d 100644 --- a/examples/001_example/templates/sum_it/output.yaml +++ b/examples/001_example/templates/sum_it/output.yaml @@ -1 +1 @@ -result: -1 +result: -2 diff --git a/examples/001_example/templates/sum_it/run.sh b/examples/001_example/templates/sum_it/run.sh index 1e6d52da2a5084ca7d814b65a3929f4b0a501db4..0965ed57585743e40b68aa33396a7548367c55ee 100755 --- a/examples/001_example/templates/sum_it/run.sh +++ b/examples/001_example/templates/sum_it/run.sh @@ -1,3 +1,4 @@ #!/bin/bash -source /afs/cern.ch/eng/tracking-tools/python_installations/activate_default_python +#bsub -q hpc_acc -e %J.err -o %J.out cd $PWD && ./run.sh +source /afs/cern.ch/eng/tracking-tools/python_installations/miniconda3/bin/activate python run.py diff --git a/examples/003_example/001_make_folders.py b/examples/003_example/001_make_folders.py new file mode 100644 index 0000000000000000000000000000000000000000..833bd4757e2136d25f0f5b0ade2760b70b07f37c --- /dev/null +++ b/examples/003_example/001_make_folders.py @@ -0,0 +1,101 @@ +# %% +import tree_maker +from tree_maker import NodeJob +import time + +import numpy as np +a=np.random.randn(20) +b=np.random.randn(20) +c=np.random.randn(10) + +my_list_original=[] +for ii in c: + my_list_original+=list((a+b)*ii) +my_list_original=sorted(my_list_original) + +# %% +""" +#### The root of the tree +""" +start_time = time.time() +# %% +#root +import os +my_folder = os.getcwd() +root = NodeJob(name='root', parent=None) +root.path = my_folder + '/study_000' +root.template_path = my_folder + '/templates' +root.log_file = root.path + "/log.json" + +# %% +""" +#### First generation of nodes +""" + +# %% +#first generation +for node in root.root.generation(0): + node.children=[NodeJob(name=f"{child:03}", + parent=node, + path=f"{node.path}/{child:03}", + template_path = root.template_path+'/sum_it', + submit_command = f'bsub -q hpc_acc -e %J.err -o %J.out {root.template_path}/sum_it/run.sh &', + log_file=f"{node.path}/{child:03}/log.json", + dictionary={'a':float(a[child]), + 'b':float(b[child]) + }) + for child in range(len(a))] + +# %% +""" +To combine different lists one can use the product or the zip functions +``` +import itertools +[[i, j, z] for i, j, z in itertools.product(['a','b'],['c','d'],[1,2,3])] +[[i, j, z] for i, j, z in zip(['a','b'],['c','d'],[1,2,3])] +``` +""" + +# %% +""" +#### Second generation of nodes +""" + +# %% +# second generation +for node in root.root.generation(1): + node.children=[NodeJob(name=f"{child:03}", + parent=node, + path = f"{node.path}/{child:03}", + template_path = f'{root.template_path}/multiply_it', + submit_command = f'bsub -q hpc_acc -e %J.err -o %J.out {root.template_path}/multiply_it/run.sh &', + log_file=f"{node.path}/{child:03}/log.json", + dictionary={'c': float(c[child])}) + for child in range(len(c))] + +root.to_json() + +print('Done with the tree creation.') +print("--- %s seconds ---" % (time.time() - start_time)) + +# %% +""" +### Cloning the templates of the nodes +From python objects we move the nodes to the file-system. +""" + +# %% +# We map the pythonic tree in a >folder< tree +start_time = time.time() +root.clean_log() +root.rm_children_folders() +from joblib import Parallel, delayed + +for depth in range(root.height): +# [x.clone_children() for x in root.generation(depth)] + Parallel(n_jobs=8)(delayed(x.clone_children)() for x in root.generation(depth)) + +# VERY IMPORTANT, tagging +root.tag_as('cloned') +print('The tree structure is moved to the file system.') +print("--- %s seconds ---" % (time.time() - start_time)) diff --git a/examples/003_example/002_chronjob.py b/examples/003_example/002_chronjob.py new file mode 100644 index 0000000000000000000000000000000000000000..c5d7d83e25006766d960c536ca787f58c09b54be --- /dev/null +++ b/examples/003_example/002_chronjob.py @@ -0,0 +1,26 @@ +# %% +""" +Example of a chronjob +""" + +# %% +import tree_maker +from tree_maker import NodeJob + + +# %% +try: + root=tree_maker.tree_from_json( + f'./study_000/tree.json') +except Exception as e: + print(e) + print('Probably you forgot to edit the address of you json file...') + +if root.has_been('completed'): + print('All descendants of root are completed!') +else: + for node in root.descendants: + node.smart_run() + if all([descendant.has_been('completed') for descendant in root.descendants]): + root.tag_as('completed') + print('All descendants of root are completed!') diff --git a/examples/003_example/003_postprocessing.py b/examples/003_example/003_postprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..e54c1c62ef38f4df3ee4e70faa9e28c18bedb1ac --- /dev/null +++ b/examples/003_example/003_postprocessing.py @@ -0,0 +1,28 @@ +# %% +""" +Example of a chronjob +""" + +# %% +import tree_maker +from tree_maker import NodeJob +import pandas as pd + +# %% +# Load the tree from a yaml +try: + root=tree_maker.tree_from_json( + f'./study_000/tree.json') +except Exception as e: + print(e) + print('Probably you forgot to edit the address of you json file...') + +my_list=[] +if root.has_been('completed'): + print('All descendants of root are completed!') + for node in root.generation(2): + my_list.append(node.has_been('completed')) + assert all(my_list) + print('Sanity check passed.') +else: + print('Complete first all jobs') diff --git a/examples/003_example/004_postprocessing.py b/examples/003_example/004_postprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..514ad428fc53d0deca98388256e82cd9187b297b --- /dev/null +++ b/examples/003_example/004_postprocessing.py @@ -0,0 +1,29 @@ +# %% +""" +Example of a chronjob +""" + +# %% +import tree_maker +from tree_maker import NodeJob +import pandas as pd +import awkward as ak + +# %% +# Load the tree from a yaml +try: + root=tree_maker.tree_from_json( + f'./study_000/tree.json') +except Exception as e: + print(e) + print('Probably you forgot to edit the address of you json file...') + +my_list=[] +if root.has_been('completed'): + print('All descendants of root are completed!') + for node in root.generation(2)[0:100]: + my_list.append(ak.from_parquet(f'{node.path}/test.parquet', columns=['x'], row_groups=99)['x',-1]) + print(my_list) +else: + print('Complete first all jobs') + diff --git a/examples/003_example/005_postprocessing.py b/examples/003_example/005_postprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..4121724ae5a0fc874ace02a71e7deb5a20613fcd --- /dev/null +++ b/examples/003_example/005_postprocessing.py @@ -0,0 +1,31 @@ +# %% +""" +Example of a chronjob +""" + +# %% +import tree_maker +from tree_maker import NodeJob +import pandas as pd +import awkward as ak +import os + +# %% +# Load the tree from a yaml +try: + root=tree_maker.tree_from_json( + f'./study_000/tree.json') +except Exception as e: + print(e) + print('Probably you forgot to edit the address of you json file...') + +my_list=[] +if root.has_been('completed'): + print('All descendants of root are completed!') + for node in root.generation(1): + node.tag_as('postprocessing_submitted') + node.submit_command=f'bsub -q hpc_acc {node.template_path}/postprocess.sh &' + node.submit() +else: + print('Complete first all jobs') + diff --git a/examples/003_example/templates/multiply_it/config.yaml b/examples/003_example/templates/multiply_it/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af123f86879a80214cd89734e405627942305f93 --- /dev/null +++ b/examples/003_example/templates/multiply_it/config.yaml @@ -0,0 +1,4 @@ +# This is my input +parent: '../sum_it' # this is the first element of the product +c: -1 # this is the second element of the product +log_file: './log.yaml' \ No newline at end of file diff --git a/examples/003_example/templates/multiply_it/log.yaml b/examples/003_example/templates/multiply_it/log.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c0e2dbbd8b758763cdac69e3ee9d82c47152759 --- /dev/null +++ b/examples/003_example/templates/multiply_it/log.yaml @@ -0,0 +1,52 @@ +{ + "0": { + "tag": "started", + "unix_time": 1624890907618272000, + "human_time": "2021-06-28 16:35:07.618272" + }, + "1": { + "tag": "completed", + "unix_time": 1624890908593553920, + "human_time": "2021-06-28 16:35:08.593554" + }, + "2": { + "tag": "started", + "unix_time": 1624890995812024064, + "human_time": "2021-06-28 16:36:35.812024" + }, + "3": { + "tag": "completed", + "unix_time": 1624890995928683008, + "human_time": "2021-06-28 16:36:35.928683" + }, + "4": { + "tag": "started", + "unix_time": 1624891021181616128, + "human_time": "2021-06-28 16:37:01.181616" + }, + "5": { + "tag": "completed", + "unix_time": 1624891021380608000, + "human_time": "2021-06-28 16:37:01.380608" + }, + "6": { + "tag": "started", + "unix_time": 1624891070778615040, + "human_time": "2021-06-28 16:37:50.778615" + }, + "7": { + "tag": "completed", + "unix_time": 1624891070982253056, + "human_time": "2021-06-28 16:37:50.982253" + }, + "8": { + "tag": "started", + "unix_time": 1624891074472503808, + "human_time": "2021-06-28 16:37:54.472504" + }, + "9": { + "tag": "completed", + "unix_time": 1624891074613457920, + "human_time": "2021-06-28 16:37:54.613458" + } +} \ No newline at end of file diff --git a/examples/003_example/templates/multiply_it/run.py b/examples/003_example/templates/multiply_it/run.py new file mode 100644 index 0000000000000000000000000000000000000000..4e99e1c0a515f596180e022199da549372ab68c1 --- /dev/null +++ b/examples/003_example/templates/multiply_it/run.py @@ -0,0 +1,33 @@ +import json +import numpy as np +import ruamel.yaml +import tree_maker + +# load the configuration +with open('config.yaml', 'r') as file: + yaml = ruamel.yaml.YAML() + cfg = yaml.load(file) + +with open(cfg['parent']+'/output.yaml', 'r') as file: + yaml = ruamel.yaml.YAML() + parent_out = yaml.load(file) + +tree_maker.tag_json.tag_it(cfg['log_file'], 'started') + +# define the function (product of two numbers) +def my_function(my_x, my_y): + 'Just a multiplication' + return my_x*my_y + +# run the code +result = my_function(parent_out['result'], cfg['c']) + +with open('output.yaml', 'w') as fp: + yaml = ruamel.yaml.YAML() + yaml.dump({'result': result}, fp) + +import pandas as pd + +pd.DataFrame(np.random.randn(100000,6), columns=['x','xp','y','yp','z','zp']).to_parquet('test.parquet', row_group_size=1000) + +tree_maker.tag_json.tag_it(cfg['log_file'], 'completed') diff --git a/examples/003_example/templates/multiply_it/run.sh b/examples/003_example/templates/multiply_it/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..71826e6bf2979c96b8268029877d9e0a4b37ff6c --- /dev/null +++ b/examples/003_example/templates/multiply_it/run.sh @@ -0,0 +1,3 @@ +#!/bin/bash +source /afs/cern.ch/eng/tracking-tools/python_installations/miniconda3/bin/activate +python /gpfs/gpfs/gpfs_maestro_home_new/hpc/sterbini/tree_maker/examples/002_example/templates/multiply_it/run.py diff --git a/examples/003_example/templates/multiply_it/run.sub b/examples/003_example/templates/multiply_it/run.sub new file mode 100644 index 0000000000000000000000000000000000000000..0aedcfa7fcc82bd71ff4365241fe0dab62d63984 --- /dev/null +++ b/examples/003_example/templates/multiply_it/run.sub @@ -0,0 +1,12 @@ +#initialdir = . +executable = run.sh +output = .output.txt +error = .err.txt +log = .log.txt +should_transfer_files = yes +when_to_transfer_output = on_exit +transfer_input_files = config.yaml, run.py +# The line below can be commented it necessary +#transfer_output_files = output.yaml ++JobFlavour = "espresso" +queue diff --git a/examples/003_example/templates/sum_it/config.yaml b/examples/003_example/templates/sum_it/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a87ce4d7e300fd2ac7fd0b1b673ff95961a14eea --- /dev/null +++ b/examples/003_example/templates/sum_it/config.yaml @@ -0,0 +1,5 @@ +# This is my input +a: -1 # this is the first element of the sum +b: -1 # this is the second element of the sum +run_command: 'python run.py' +log_file: './log.yaml' diff --git a/examples/003_example/templates/sum_it/postprocess.py b/examples/003_example/templates/sum_it/postprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..6bfcf2d668b0ac80e71c0bb750859c46f9b77079 --- /dev/null +++ b/examples/003_example/templates/sum_it/postprocess.py @@ -0,0 +1,11 @@ +import glob +import awkward as ak +import numpy as np + +my_folders=sorted(glob.glob('0*')) +my_list=[] +for my_folder in my_folders: + aux=ak.from_parquet(f'{my_folder}/test.parquet') + my_list.append(np.mean(aux)) +aux=ak.Array(my_list) +ak.to_parquet(aux,'./summary.parquet') diff --git a/examples/003_example/templates/sum_it/postprocess.sh b/examples/003_example/templates/sum_it/postprocess.sh new file mode 100755 index 0000000000000000000000000000000000000000..30aa383c284f910819f38241b7c8b6e4609c6d7e --- /dev/null +++ b/examples/003_example/templates/sum_it/postprocess.sh @@ -0,0 +1,4 @@ +#!/bin/bash +#bsub -q hpc_acc -e %J.err -o %J.out cd $PWD && ./run.sh +source /afs/cern.ch/eng/tracking-tools/python_installations/miniconda3/bin/activate +python /gpfs/gpfs/gpfs_maestro_home_new/hpc/sterbini/tree_maker/examples/002_example/templates/sum_it/postprocess.py diff --git a/examples/003_example/templates/sum_it/run.py b/examples/003_example/templates/sum_it/run.py new file mode 100644 index 0000000000000000000000000000000000000000..5458868636ff065a768315943219b9659ed763f7 --- /dev/null +++ b/examples/003_example/templates/sum_it/run.py @@ -0,0 +1,25 @@ +import json +import numpy as np +import ruamel.yaml +import tree_maker + +# load the configuration +with open('config.yaml', 'r') as file: + yaml = ruamel.yaml.YAML() + cfg = yaml.load(file) + +tree_maker.tag_json.tag_it(cfg['log_file'], 'started') + +# define the function (sum of two numbers) +def my_function(my_x, my_y): + 'Just an addition' + return my_x+my_y + +# run the code +result = my_function(cfg['a'], cfg['b']) + +with open('output.yaml', 'w') as fp: + yaml = ruamel.yaml.YAML() + yaml.dump({'result': result}, fp) + +tree_maker.tag_json.tag_it(cfg['log_file'], 'completed') diff --git a/examples/003_example/templates/sum_it/run.sh b/examples/003_example/templates/sum_it/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..33aae7ea3c9abc6666679ebf633b5b2976da9995 --- /dev/null +++ b/examples/003_example/templates/sum_it/run.sh @@ -0,0 +1,4 @@ +#!/bin/bash +#bsub -q hpc_acc -e %J.err -o %J.out cd $PWD && ./run.sh +source /afs/cern.ch/eng/tracking-tools/python_installations/miniconda3/bin/activate +python /gpfs/gpfs/gpfs_maestro_home_new/hpc/sterbini/tree_maker/examples/002_example/templates/sum_it/run.py diff --git a/examples/003_example/templates/sum_it/run.sub b/examples/003_example/templates/sum_it/run.sub new file mode 100644 index 0000000000000000000000000000000000000000..0aedcfa7fcc82bd71ff4365241fe0dab62d63984 --- /dev/null +++ b/examples/003_example/templates/sum_it/run.sub @@ -0,0 +1,12 @@ +#initialdir = . +executable = run.sh +output = .output.txt +error = .err.txt +log = .log.txt +should_transfer_files = yes +when_to_transfer_output = on_exit +transfer_input_files = config.yaml, run.py +# The line below can be commented it necessary +#transfer_output_files = output.yaml ++JobFlavour = "espresso" +queue diff --git a/tree_maker/NodeJob.py b/tree_maker/NodeJob.py index 24dfda856abc945d0f92360aeeeb9d58b01ddb19..f2017184a6f21ebed5e341b0bcf64dc3ef361d74 100644 --- a/tree_maker/NodeJob.py +++ b/tree_maker/NodeJob.py @@ -1,14 +1,16 @@ import anytree # pip install anytree import subprocess -from shutil import copytree +from shutil import copytree, copy import ruamel.yaml # pip install ruamel.yaml import yaml # pip install pyyaml +import json # pip install json from anytree import AnyNode -from anytree.exporter import DictExporter -from anytree.importer import DictImporter +from anytree.exporter import DictExporter, JsonExporter +from anytree.importer import DictImporter, JsonImporter from pathlib import Path import pandas as pd import tree_maker +import os from anytree import AnyNode, NodeMixin, RenderTree class NodeJobBase(object): # Just an example of a base class @@ -49,15 +51,16 @@ class NodeJob(NodeJobBase, NodeMixin): # Add Node feature def clone(self): if not self.template_path==None: - copytree(self.template_path, child.path) + copytree(self.template_path+'/config.yaml', child.path) else: subprocess.call(f'mkdir {self.path}', shell=True) - self.to_yaml() + self.to_json() def clone_children(self): for child in self.children: - copytree(child.template_path, child.path) - child.to_yaml() + os.makedirs(child.path, exist_ok=True) + copy(child.template_path+'/config.yaml', child.path+'/config.yaml') + child.to_json() def rm_children_folders(self,): for child in self.children: @@ -102,6 +105,12 @@ class NodeJob(NodeJobBase, NodeMixin): # Add Node feature with open(f"{self.path}/{filename}", "w") as file: yaml.dump(DictExporter().export(self), file) + def to_json(self, filename='tree.json'): + if not Path(self.path).is_dir(): + subprocess.call(f'mkdir {self.path}', shell=True) + with open(f"{self.path}/{filename}", "w") as file: + file.write(JsonExporter(indent=2, sort_keys=True).export(self)) + def generation(self, number): return [ii for ii in anytree.search.findall(self, filter_=lambda node: node.depth==number)] @@ -114,20 +123,22 @@ class NodeJob(NodeJobBase, NodeMixin): # Add Node feature return False def has_been(self, tag): - if self._is_logging_file(): - my_df= pd.DataFrame(tree_maker.from_yaml(self.log_file)).transpose() - if tag in my_df['tag'].values: - return True - else: - return False + #if self._is_logging_file(): + if tag in tree_maker.from_json(self.log_file).keys(): + return True else: - return False + return False + #else: + # return False def has_not_been(self, tag): return not self.has_been(tag) def tag_as(self, tag): - tree_maker.tag.tag_it(self.log_file, tag) + ''' + This is to tag the node's activity. + ''' + tree_maker.tag_json.tag_it(self.log_file, tag) def find(self, **kwargs): return anytree.search.findall(self,**kwargs) @@ -153,15 +164,3 @@ class NodeJob(NodeJobBase, NodeMixin): # Add Node feature self.tag_as('submitted') self.submit() - - def sum_is_five(x, y): - ''' - this is an example function to test wether the pytest is working - ''' - try: - z = 5 - assert x + y == z - return True - except: - return False - diff --git a/tree_maker/__init__.py b/tree_maker/__init__.py index c34b5ca1493dc12266b869ded838776a59270a31..fa42a9e051012c1a45dfb5054efbdeee18d09f26 100644 --- a/tree_maker/__init__.py +++ b/tree_maker/__init__.py @@ -8,8 +8,11 @@ __version__ = "0.0.1" from .NodeJob import NodeJob from .general import tree_from_yaml +from .general import tree_from_json from .general import from_yaml +from .general import from_json from .tag import * +from .tag_json import * diff --git a/tree_maker/general.py b/tree_maker/general.py index 4e2c7dfc85bad2c72c8df5111de3f2b9f78b181c..f7f8ce0e17b74a341b37a6aea4faa17c8b15f64a 100644 --- a/tree_maker/general.py +++ b/tree_maker/general.py @@ -4,6 +4,8 @@ from anytree.importer import DictImporter from tree_maker import NodeJob import yaml # pip install pyyaml import ruamel.yaml +import json +import orjson ryaml = ruamel.yaml.YAML() @@ -11,6 +13,9 @@ def tree_from_yaml(filename='tree.yaml'): with open(filename, "r") as file: return DictImporter(nodecls=NodeJob).import_(yaml.load(file, Loader=yaml.FullLoader)) +def tree_from_json(filename='tree.json'): + with open(filename, "r") as file: + return DictImporter(nodecls=NodeJob).import_(orjson.loads(file.read())) def from_yaml(filename): try: @@ -18,4 +23,12 @@ def from_yaml(filename): return ryaml.load(file) except Exception as e: print(e) - return {} \ No newline at end of file + return {} + +def from_json(filename, verbose=False): + try: + with open(filename, 'r') as file: + return orjson.loads(file.read()) + except Exception as e: + if verbose: print(e) + return {} diff --git a/tree_maker/repository.py b/tree_maker/repository.py new file mode 100644 index 0000000000000000000000000000000000000000..8e0203f1875aa91c7bb3591ddf537cbd5f624371 --- /dev/null +++ b/tree_maker/repository.py @@ -0,0 +1,97 @@ +# conda env export -n base > environment.yml +# conda env create --prefix /home/HPC/sterbini/test1 -f environment.yml +# import git +import os +import subprocess +from pathlib import Path + +# g = git.cmd.Git(git_dir) +# g.pull() + +# https://stackoverflow.com/questions/14989858/ +def get_hash(repo): + #os.chdir(str(Path(repo).expanduser())) + return subprocess.check_output([f'git', '-C', f'{str(Path(repo).expanduser())}', + 'rev-parse', 'HEAD'])[0:-1].decode("utf-8") + + +def create_base(my_base, my_list, verbose=False): + for ii in my_list: + print(f'**** {ii["repo"]} to {ii["folder_name"]} ****') + if Path(ii["folder_name"]).is_dir(): + if verbose: + print("The folder exists.") + try: + os.system(f'git -C {ii["folder_name"]} pull') + except Exception as e: + print(e) + else: + if verbose: + print("The folder does not exist exists.") + try: + os.system(f'git clone --depth=1 {ii["repo"]} {ii["folder_name"]}') + except Exception as e: + print(e) + print("") + + +if __name__ == "__main__": + my_base = Path('~').expanduser() / "base" + my_list = [ + { + "repo": "http://github.com/lhcopt/lhcmask", + "folder_name": str(my_base / "tracking-tools/modules"), + }, + { + "repo": "http://github.com/lhcopt/lhcerrors", + "folder_name": str(my_base / "tracking-tools/errors"), + }, + { + "repo": "http://github.com/lhcopt/lhcmachines", + "folder_name": str(my_base / "tracking-tools/machines"), + }, + { + "repo": "http://github.com/lhcopt/lhctoolkit", + "folder_name": str(my_base / "tracking-tools/tools"), + }, + { + "repo": "http://github.com/lhcopt/beambeam", + "folder_name": str(my_base / "tracking-tools/beambeam_macros"), + }, + { + "repo": "http://github.com/lhcopt/hllhc13", + "folder_name": str(my_base / "lhc/optics/HLLHCV1.3"), + }, + { + "repo": "http://github.com/lhcopt/hllhc14", + "folder_name": str(my_base / "lhc/optics/HLLHCV1.4"), + }, + { + "repo": "http://github.com/lhcopt/hllhc15", + "folder_name": str(my_base / "lhc/optics/HLLHCV1.5"), + }, + { + "repo": "http://github.com/lhcopt/lhcrunIII", + "folder_name": str(my_base / "lhc/optics/runIII"), + }, + { + "repo": "http://github.com/lhcopt/lhc2018", + "folder_name": str(my_base / "lhc/optics/runII/2018"), + }, + { + "repo": "https://github.com/SixTrack/pysixtrack", + "folder_name": str(my_base / "tracking-tools/python_installations/pysixtrack"), + }, + { + "repo": "https://github.com/SixTrack/sixtracktools", + "folder_name": str(my_base + / "tracking-tools/python_installations/sixtracktools"), + }, + { + "repo": "https://gitlab.cern.ch/abpcomputing/sandbox/tree_maker", + "folder_name": str(my_base / "tracking-tools/python_installations/tree_maker"), + }, + ] + create_base(my_base, my_list, True) + for ii in my_list: + ii['hash'] = get_hash(ii['folder_name']) diff --git a/tree_maker/tag_json.py b/tree_maker/tag_json.py new file mode 100644 index 0000000000000000000000000000000000000000..c3a0d9313e1ea39a79efceaf69cd7c914b173742 --- /dev/null +++ b/tree_maker/tag_json.py @@ -0,0 +1,102 @@ +# From Hamish Graham + +import json +import datetime + +# load the configuration +def read_json(myfile, verbose=False): + """ + Read a json file and convert it into python. + + Example + -------- + >>> read_json('mytest.json') + + This will return the contents inside of a json file. + """ + try: + with open(myfile, 'r') as file: + my_dict = json.load(file) + return my_dict + except FileNotFoundError: + if verbose: print('New file created.') + my_dict = {} + return my_dict + except Exception as e: + if verbose: print(e.__class__) + return None + + +def write_json(my_dict, myfile): + """ + Convert a dictionary into a json file. + + Examples + -------- + >>> write_json({'green': 'hello'}, ('mytest2.json')) + + This will add {'green': 'hello'} as a dictionary to the ('mytest2.json') file. + """ + with open(myfile, 'w') as file: + json.dump(my_dict, file, indent=2) + + +def append_json(my_dict, myfile): + ''' + Append dictionaries to a json file. + + Examples + -------- + >>> append_json({'blue': 'bonjour'}, ('mytest2.json')) + + This will append {'blue': 'bonjour'} to an existing json file: ('mytest2.json') + ''' + + try: + with open(myfile, "r") as file: + data = json.load(file) + data.update(my_dict) + except: + data=my_dict + write_json(data, myfile) + +def get_last_stage(myfile, verbose=False): + """ + get_last_stage is to read a json file and to return the number of the last dictionary + + Examples + -------- + >>> get_last_stage('myfile', verbose=True) + + If the get_last_stage has two dictionaries: labeled '0' and '1' it will return the '1', the last stage. + """ + my_dict=read_json(myfile, verbose) + try: + return int(list(my_dict.keys())[-1])+1 + except IndexError: + if verbose: print('IndexError, I consider 0 as first item') + return 0 + except Exception as e: + if verbose: print(e.__class__) + return 0 + + +def tag_it(myfile, mycomment): + """ + Create timestamps and add them to a json file. + + Examples + -------- + >>> tag_it('myfile', 'hello') + + This will create a a human readable and a unix time stamp and append that to 'myfile', + including the comment 'hello' + """ + #stage = get_last_stage(myfile) + #my_dict = {stage: {}} + #my_dict[stage]['tag'] = mycomment + my_now=datetime.datetime.now() + #my_dict[stage]['unix_time'] = int(1e9*my_now.timestamp()) #in nanoseconds + #my_dict[stage]['human_time'] = str(my_now) + #append_json(my_dict, myfile) + append_json({mycomment: int(1e9*my_now.timestamp())}, myfile)