From 7662fef44b5c1bcd39ba7e929617d8bed0d50f71 Mon Sep 17 00:00:00 2001
From: Guido Sterbini <guido.sterbini@cern.ch>
Date: Fri, 18 Jun 2021 10:06:25 +0000
Subject: [PATCH] polishing

---
 examples/001_example/001.ipynb | 193 +++++++++++-----------
 examples/001_example/001.py    | 290 ++++++++++++++++++++++-----------
 2 files changed, 293 insertions(+), 190 deletions(-)

diff --git a/examples/001_example/001.ipynb b/examples/001_example/001.ipynb
index 52872a3..e4c0fd1 100644
--- a/examples/001_example/001.ipynb
+++ b/examples/001_example/001.ipynb
@@ -2,11 +2,13 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "fc33802d",
+   "id": "27c7661c",
    "metadata": {},
    "source": [
     "### Introduction\n",
     "\n",
+    "See https://codimd.web.cern.ch/p/0QX9ebi1bn#/ for the latest version.\n",
+    "\n",
     "Our community is often confronted with the need of running complex algorithms for a set of different input.\n",
     "E.g. a DA computation with tune scan + beam-beam + errors.\n",
     "\n",
@@ -34,7 +36,7 @@
     " \n",
     "The main idea is to downscale the problem of a generic graph to a simpler graph, a **tree**.\n",
     "\n",
-    "A **tree** is a simplified **DAG** (Directed Acycled Graphs) where each node can have maximum one parent.\n",
+    "A **tree** is a simplified [**DAG**](https://en.wikipedia.org/wiki/Directed_acyclic_graph) (Directed Acycled Graphs) where each node can have maximum one parent.\n",
     "The tree is convenient since it can be directly mapped into a file system (the folder stucture of a file system is a tree).\n",
     "\n",
     "In python a tree can be represented, for example, with the `anytree` package (see [000_example](https://gitlab.cern.ch/abpcomputing/sandbox/tree_maker/-/blob/master/examples/000_example/000.ipynb)). \n",
@@ -52,9 +54,7 @@
     "6. has a specific log file, `NodeJob.log_path`\n",
     "\n",
     "\n",
-    "In this way we can factorize the physics (the template), the parameters (the dictionary), the folder (JobNode.path) but maintaining for all nodes the very same interface (`JobNode`).\n",
-    "\n",
-    "The users should spend 99% of their time on the physics (the templates), and use the package to build/orchestrate the tree.\n",
+    "The users should spend 99% of their time on the physics (the templates, each template is well \"isolated\" for a deep understanding of its physics), and use the package to build/orchestrate the tree.\n",
     "\n",
     "#### Building of the tree\n",
     "The building of the tree is done in three steps:\n",
@@ -69,6 +69,8 @@
     "One can orchestrate the simulation but writing and reading in the different log.\n",
     "\n",
     "We will show now a simple example to clarify all these ingredients.\n",
+    "In this way we can factorize the physics (the template), the parameters (the dictionary), the folder (JobNode.path) but maintaining for all nodes the very same interface (`JobNode`).\n",
+    "\n",
     "\n",
     "\n",
     "\n",
@@ -93,8 +95,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
-   "id": "0df4bfee",
+   "execution_count": 1,
+   "id": "f40cbd4a",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -104,8 +106,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
-   "id": "97a88117",
+   "execution_count": 2,
+   "id": "67a6600b",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -129,7 +131,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "54554408",
+   "id": "65dff38c",
    "metadata": {},
    "source": [
     "#### The root of the tree "
@@ -137,8 +139,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 68,
-   "id": "f4d2df7f",
+   "execution_count": 3,
+   "id": "71128392",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -151,7 +153,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "c372b59e",
+   "id": "80c3c511",
    "metadata": {},
    "source": [
     "#### First generation of nodes"
@@ -159,8 +161,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 69,
-   "id": "7b62b304",
+   "execution_count": 4,
+   "id": "710b08d0",
    "metadata": {},
    "outputs": [
     {
@@ -198,7 +200,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "9d0a877c",
+   "id": "6aa7ce84",
    "metadata": {},
    "source": [
     "#### Second generation of nodes"
@@ -206,8 +208,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 70,
-   "id": "b5234f82",
+   "execution_count": 5,
+   "id": "a2299ffd",
    "metadata": {},
    "outputs": [
     {
@@ -246,7 +248,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "1656c5e6",
+   "id": "f5d19174",
    "metadata": {},
    "source": [
     "#### Third generation of nodes"
@@ -254,8 +256,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 71,
-   "id": "ff460439",
+   "execution_count": 6,
+   "id": "8fa89652",
    "metadata": {},
    "outputs": [
     {
@@ -302,8 +304,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
-   "id": "929dd35f",
+   "execution_count": 7,
+   "id": "b9c3c5d4",
    "metadata": {},
    "outputs": [
     {
@@ -312,7 +314,7 @@
        "'python run.py'"
       ]
      },
-     "execution_count": 72,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -324,8 +326,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 73,
-   "id": "316735a3",
+   "execution_count": 8,
+   "id": "21e3a028",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -339,8 +341,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
-   "id": "94f95902",
+   "execution_count": 9,
+   "id": "688a31ad",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -350,7 +352,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e7348bdc",
+   "id": "b875cce1",
    "metadata": {},
    "source": [
     "### Cloning the templates of the nodes\n",
@@ -359,8 +361,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
-   "id": "5517d98c",
+   "execution_count": 10,
+   "id": "7387623d",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -376,7 +378,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "4f94cc59",
+   "id": "1bf865c4",
    "metadata": {},
    "source": [
     "### Launching the jobs"
@@ -384,8 +386,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
-   "id": "2bfd5442",
+   "execution_count": 11,
+   "id": "83809e16",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -396,8 +398,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 83,
-   "id": "61daaf10",
+   "execution_count": 12,
+   "id": "3b59d004",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -407,8 +409,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 84,
-   "id": "fcd92d8c",
+   "execution_count": 13,
+   "id": "cceccd30",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -418,8 +420,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 86,
-   "id": "60197db7",
+   "execution_count": 14,
+   "id": "bfe59b42",
    "metadata": {},
    "outputs": [
     {
@@ -439,7 +441,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "1679f68c",
+   "id": "aabde768",
    "metadata": {},
    "source": [
     "### Post-processing"
@@ -447,8 +449,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 87,
-   "id": "f5bbc785",
+   "execution_count": 15,
+   "id": "d74f9767",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -461,8 +463,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 88,
-   "id": "601e756a",
+   "execution_count": 16,
+   "id": "b2c58ea3",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -472,17 +474,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 91,
-   "id": "4759a59b",
+   "execution_count": 17,
+   "id": "a683bc17",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.40196317514377394"
+       "0.31392093032473517"
       ]
      },
-     "execution_count": 91,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -494,7 +496,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e1d31da8",
+   "id": "18497c01",
    "metadata": {},
    "source": [
     "### Monitoring "
@@ -502,8 +504,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 92,
-   "id": "6ca20c0d",
+   "execution_count": 18,
+   "id": "4f1d2724",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -512,101 +514,96 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 112,
-   "id": "bee058fe",
+   "execution_count": 19,
+   "id": "5e4a7b91",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/000\n",
-      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/001\n",
-      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/002\n",
-      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/003\n"
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/000/000\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/000/001\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/001/000\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/001/001\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/002/000\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/002/001\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/003/000\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/003/001\n"
      ]
     }
    ],
    "source": [
     "# checking the status\n",
-    "for node in root.find(filter_= lambda node: node.depth==1 and \n",
-    "                                   node.has_been('completed')):\n",
-    "    print(node.path)"
+    "my_filter = lambda node: node.depth==2 and node.has_been('completed')\n",
+    "for node in root.descendants:\n",
+    "    if my_filter(node):\n",
+    "        print(node.path)\n",
+    "        \n",
+    "# one can also use root.find(filter_= lambda node: node.depth==1 and node.has_been('completed'))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 113,
-   "id": "342f1b76",
+   "execution_count": 20,
+   "id": "94792469",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/000/000/000\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/000/001/000\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/001/000/000\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/001/001/000\n",
       "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/002/000/000\n",
       "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/002/001/000\n",
-      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/003/000/000\n"
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/003/001/000\n"
      ]
     }
    ],
    "source": [
     "def my_test(node):\n",
     "    output = tree_maker.from_yaml(node.path+'/output.yaml')\n",
-    "    return output['result']<1.2\n",
+    "    return node.is_leaf and node.has_been('completed') and output['result']<1.2\n",
     "\n",
-    "for node in root.find(filter_=lambda node: node.is_leaf and \n",
-    "                                           node.has_been('completed') and \n",
-    "                                           my_test(node)):\n",
-    "    print(node.path)"
+    "for node in root.descendants:\n",
+    "    if my_test(node):\n",
+    "        print(node.path) "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 116,
-   "id": "c93a703a",
+   "execution_count": 21,
+   "id": "60df3b11",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[3]\n"
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/000/000/000\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/000/001/000\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/001/000/000\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/001/001/000\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/002/000/000\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/002/001/000\n",
+      "/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/003/001/000\n"
      ]
     }
    ],
    "source": [
-    "a=np.array([3])\n",
-    "b=a.copy()\n",
-    "b[0]=2\n",
-    "print(a)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 118,
-   "id": "20931e84",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'13'"
-      ]
-     },
-     "execution_count": 118,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "'1'+'3'"
+    "#or (better)\n",
+    "for node in root.generation(3):\n",
+    "    if my_test(node):\n",
+    "        print(node.path)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "2dda51e8",
+   "id": "57441661",
    "metadata": {},
    "outputs": [],
    "source": []
diff --git a/examples/001_example/001.py b/examples/001_example/001.py
index 92ab155..e18ddc0 100644
--- a/examples/001_example/001.py
+++ b/examples/001_example/001.py
@@ -1,15 +1,91 @@
 # %%
 """
+### Introduction
+
+See https://codimd.web.cern.ch/p/0QX9ebi1bn#/ for the latest version.
+
+Our community is often confronted with the need of running complex algorithms for a set of different input.
+E.g. a DA computation with tune scan + beam-beam + errors.
+
+This implies to stage the algorithm in different steps corresponding, sometimes, to different codes (MADX, SixTrack,...) and/or different hardware (local CPU, GPU, HTCondor/LSF clusters, BOINC...).
+
+The topic of this brainstorming is to discuss about a python package that could convey a **standard** approach in order to
+
+- avoid re-inventing the wheel each time, 
+- improve the way we share our work-flow for the different simulations,
+- provide a standard way to babysitting the simulations and postprocess the output.
+
+Clearly the package can be integrated with other solutions (see next [presentation]()).
+
+The challenge here is to maintain a good balance between simplicity (to be user-friendly) and flexibility (to cover a large gamut of use cases).
+
+You can find at https://gitlab.cern.ch/abpcomputing/sandbox/tree_maker a proposal.
+We are going first to present its rationale (a bit abstract, 5 min) and then explore together a simple example (pragmatic and complementary to the first part, 15 min).
+
+
+### Rationale
+
+The general way to describe our problem (running a staged algorithm for a set of different input) is to associate a **job** for each stage and input.
+
+A job can be represented as a **node** in a **graph** (nodes connected with edges).
+ 
+The main idea is to downscale the problem of a generic graph to a simpler graph, a **tree**.
+
+A **tree** is a simplified [**DAG**](https://en.wikipedia.org/wiki/Directed_acyclic_graph) (Directed Acycled Graphs) where each node can have maximum one parent.
+The tree is convenient since it can be directly mapped into a file system (the folder stucture of a file system is a tree).
+
+In python a tree can be represented, for example, with the `anytree` package (see [000_example](https://gitlab.cern.ch/abpcomputing/sandbox/tree_maker/-/blob/master/examples/000_example/000.ipynb)). 
+
+The `anynode` object of the `anytree` package can be generalized to any class.
+Indeed we generalized it to our `NodeJob` class, inheriting all the methods/attributes of `anynode`, e.g., root, parent, children, ancestors, siblings, leaves, depth, height, searching/filtering methods... 
+
+The main ideas is that each node of our simulation tree 
+
+1. is a instance of the `NodeJob` (extending the `anytree`).
+2. refers to a **template node** (example a MadX mask): `NodeJob.template_path`
+3. has a specific dictionary of input, `NodeJob.dictionary`
+4. is mapped to a file system, `NodeJob.path`
+5. has a specific submit command, `NodeJob.submit_command`
+6. has a specific log file, `NodeJob.log_path`
+
+
+The users should spend 99% of their time on the physics (the templates, each template is well "isolated" for a deep understanding of its physics), and use the package to build/orchestrate the tree.
+
+#### Building of the tree
+The building of the tree is done in three steps:
+- istantiating the nodes
+- **cloning** (i.e. copying) the templates on the NodeJob.path
+- **mutating** (i.e. changing) the input of the template with the info in the NodeJob.dictionary
+
+
+#### Orchestrating the tree
+
+Each node can be run (refers to NodeJob.submit_command) and logged (NodeJob.submit_command).
+One can orchestrate the simulation but writing and reading in the different log.
+
+We will show now a simple example to clarify all these ingredients.
+In this way we can factorize the physics (the template), the parameters (the dictionary), the folder (JobNode.path) but maintaining for all nodes the very same interface (`JobNode`).
+
+
+
+
+### Simple example ([001_example](https://gitlab.cern.ch/abpcomputing/sandbox/tree_maker/-/blob/master/examples/001_example/001.ipynb))
+
+
 Let aussume that we need to make this computation
 
 $\sqrt{|(a+b)\times c|}$
 
-and we want to compute the standard deviation of the result assuming that a, b and c are normal distributed independent variables. Clearly the problem is quite naive but we want to address is as if we will need a cluster to solve it. 
+and we want to compute the standard deviation of the result assuming that a, b and c are normal distributed independent variables. Clearly the problem is quite naive but we want to address it as if we will need a cluster to solve it. 
+
+For example, we can partition the problem in three conscutive stages
 
-We can partition the problem in a three conscutive operations
 1. A sum: $(a+b)$
 2. A multiplication of the result 1 with c: $(a+b)\times c$
 3. A sqrt of the result of 2: $\sqrt{|(a+b)\times c|}$
+
+For each stage we build a template.
+Documentation (only started, you need to be on GPN) can be found at https://acc-py.web.cern.ch/gitlab/abpcomputing/sandbox/tree_maker/docs/master/. 
 """
 
 # %%
@@ -17,6 +93,13 @@ import tree_maker
 from tree_maker import NodeJob
 
 # %%
+# Clearly for this easy task on can do all in the very same python kernel
+# BUT here we want to mimic the typical flow
+# 1. MADX for optics matching/error seeding
+# 2. Tracking for FMA and or DA studies
+# 3. simulation baby-sitting and
+# 4. postprocessing
+
 import numpy as np
 a=np.random.randn(4)
 b=np.random.randn(4)
@@ -27,162 +110,185 @@ for ii in c:
     my_list_original+=list(np.sqrt(np.abs((a+b)*ii)))
 my_list_original=sorted(my_list_original)
 
+# %%
+"""
+#### The root of the tree 
+"""
+
 # %%
 #root
 root = NodeJob(name='root', parent=None)
-# to be modified accordingly
 root.path = '/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000'
-root.dictionary = {'log_file': f"{root.path}/log.yaml"}
-root.clean_log()
+root.template_path = root.path + '/../templates'
+root.log_file = root.path + "/log.yaml"
+
+# %%
+"""
+#### First generation of nodes
+"""
 
+# %%
 #first generation
-for node in root.leaves:
+for node in root.root.generation(0):
     node.children=[NodeJob(name=f"{child:03}",
                            parent=node,
-                           path = f"{node.path}/{child:03}",
-                           template_path = root.path+'/../templates/sum_it',
-                           run_command = f'python run.py',
+                           path=f"{node.path}/{child:03}",
+                           template_path = root.template_path+'/sum_it',
+                           submit_command = f'python run.py',
+                           log_file=f"{node.path}/{child:03}/log.yaml",
                            dictionary={'a':float(a[child]), 
-                                       'b':float(b[child]),
-                                       'log_file': f"{node.path}/{child:03}/log.yaml"
+                                       'b':float(b[child])
                                       })
                    for child in range(len(a))]
 
+# To combine different lists one can use the product or the zip functions    
+#import itertools
+#[[i, j, z] for i, j, z in itertools.product(['a','b'],['c','d'],[1,2,3])]
+#[[i, j, z] for i, j, z in zip(['a','b'],['c','d'],[1,2,3])]
+root.print_it()
+
+# %%
+"""
+#### Second generation of nodes
+"""
+
+# %%
 #second generation
-for node in root.leaves:
+for node in root.root.generation(1):
     node.children=[NodeJob(name=f"{child:03}",
                            parent=node,
                            path = f"{node.path}/{child:03}",
-                           template_path = root.path+'/../templates/multiply_it',
-                           run_command = f'python run.py',
-                           dictionary={'c':float(c[child]),
-                                       'log_file': f"{node.path}/{child:03}/log.yaml",
-                                      })
+                           template_path = root.template_path+'/multiply_it',
+                           submit_command = f'python run.py',
+                           log_file=f"{node.path}/{child:03}/log.yaml",
+                           dictionary={'c': float(c[child])})
                    for child in range(len(c))]
-    
+root.print_it()
+
+# %%
+"""
+#### Third generation of nodes
+"""
+
+# %%
 #third generation
-for node in root.leaves:
+for node in root.root.generation(2):
     node.children=[NodeJob(name=f"{child:03}",
                            parent=node, 
                            path = f"{node.path}/{child:03}",
-                           template_path = root.path+'/../templates/square_root_it',
-                           run_command = f'python run.py',
-                           dictionary={'a':float(c[child]),
-                                       'log_file': f"{node.path}/{child:03}/log.yaml",
-                                       'test': {'guido':4}
-                                      })
+                           template_path = root.template_path+'/square_root_it',
+                           submit_command = f'python run.py',
+                           log_file=f"{node.path}/{child:03}/log.yaml",
+                           dictionary={'log_file': f"{node.path}/{child:03}/log.yaml"})
                            for child in range(1)]
-    
-root.to_yaml()
+root.print_it()
+
+# %%
+# we can inspect the data structure
+root.children[3].children[1].children[0].submit_command
 
 # %%
+# or we can modify the attributes of the tree
 if False:
     for i, node in enumerate(root.leaves):
         if i>3:
             print(i)
-            node.run_command = f'condor_submit run.sub -batch-name square_root'
-
-# %%
-root.print_it()
+            node.submit_command = f'condor_submit run.sub -batch-name square_root'
 
 # %%
-# save the tree
+# we can transfer the information of the tree in a yaml for the orchestration later
 root.to_yaml()
 
 # %%
-root.path
+"""
+### Cloning the templates of the nodes
+From python objects we move the nodes to the file-system.
+"""
 
 # %%
-# Load the tree from a yaml
-root=tree_maker.tree_from_yaml(f'{root.path}/tree.yaml')
-root.print_it()
+# We map the pythonic tree in a >folder< tree
+root.clean_log()
+root.rm_children_folders()
+for depth in range(root.height):
+    [x.clone_children() for x in root.generation(depth)]
 
-# %%
-root.children[3].children[1].children[0].run_command
+# VERY IMPORTANT, tagging
+root.tag_as('cloned')
 
 # %%
-# STEP 1 cloning
-root.tag_it('starting cloning')
-
-root.rm_children()
-[x.clone_children() for x in root.generation(0)]
-[x.clone_children() for x in root.generation(1)]
-[x.clone_children() for x in root.generation(2)]
-
-root.tag_it('cloned')
+"""
+### Launching the jobs
+"""
 
 # %%
-root.tag_it('launched')
+root.tag_as('launched')
 for node in root.generation(1):
-    node.clean_log()
-    
-    node.tag_it('Before mutation')
-    node.mutate()
-    node.tag_it('After mutation')
-    
-    node.tag_it('Before submission')
-    node.run()
-    node.tag_it('After submission')
+    node.cleanlog_mutate_submit()
 
 # %%
 for node in root.generation(2):
-    node.clean_log()
-
-    node.tag_it('Before mutation')
-    parent_output = tree_maker.from_yaml(node.parent.path+'/output.yaml')
-    node.dictionary['sum_a_b']=parent_output['result']
-    node.mutate()
-    node.tag_it('After mutation')
-
-    node.tag_it('Before submission')
-    node.run()
-    node.tag_it('After submission')
+    node.cleanlog_mutate_submit()
 
 # %%
 for node in root.generation(3):
-    node.clean_log()
+    node.cleanlog_mutate_submit()
 
-    node.tag_it('Before mutation')
-    parent_output = tree_maker.from_yaml(node.parent.path+'/output.yaml')
-    node.dictionary['a']=parent_output['result']
-    node.mutate()
-    node.tag_it('After mutation')
+# %%
+# check if all root descendants are completed 
+if all([descendant.has_been('completed') for descendant in root.descendants]):
+    root.tag_as('completed')
+    print('All jobs are completed!')
 
-    node.tag_it('Before submission')
-    node.run()
-    node.tag_it('After submission')
+# %%
+"""
+### Post-processing
+"""
 
 # %%
+# retrieve the output
 my_list=[]
-for node in root.generation(3):
+for node in root.leaves:
     output = tree_maker.from_yaml(node.path+'/output.yaml')
     my_list.append(output['result'])
 
 # %%
+# sanity check
 assert any(np.array(sorted(my_list))-np.array(my_list_original))==0
 
 # %%
-root=tree_maker.tree_from_yaml(f'/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/tree.yaml')
+# std of the results
+np.std(my_list)
 
 # %%
-for child in root.generation(2):
-    if child.is_tag('started'):
-        print(child.path)
+"""
+### Monitoring 
+"""
 
 # %%
-for child in root.generation(3):
-    if child.is_tag('completed'):
-        print(child.path)
+root=tree_maker.tree_from_yaml(f'/home/jovyan/local_host_home/CERNBox/2021/tree_maker/examples/001_example/study_000/tree.yaml')
 
 # %%
-if any([descendant.is_tag('completed') for descendant in root.descendants]):
-    root.tag_it('completed')
+# checking the status
+my_filter = lambda node: node.depth==2 and node.has_been('completed')
+for node in root.descendants:
+    if my_filter(node):
+        print(node.path)
+        
+# one can also use root.find(filter_= lambda node: node.depth==1 and node.has_been('completed'))
 
 # %%
+def my_test(node):
+    output = tree_maker.from_yaml(node.path+'/output.yaml')
+    return node.is_leaf and node.has_been('completed') and output['result']<1.2
+
+for node in root.descendants:
+    if my_test(node):
+        print(node.path) 
 
-#def run_HTCondor(self):
-#    import subprocess
-#    print('Launching on HTCondor')
-#    subprocess.run(f'cd {self.path}; condor_submit run.sub;')
-#NodeJob.run_HTCondor=run_HTCondor
+# %%
+#or (better)
+for node in root.generation(3):
+    if my_test(node):
+        print(node.path)
 
+# %%
-- 
GitLab