From 0ce0355068cb6ab10168a11f732c110ee1b9abb6 Mon Sep 17 00:00:00 2001
From: Thomas van Vark <thomas.van.vark@cern.ch>
Date: Mon, 8 Jan 2024 09:13:17 +0100
Subject: [PATCH] Replaced job system

---
 .gitignore               |  3 ++-
 Dockerfile               | 10 +++++++---
 README.md                | 20 ++++++++++++++++++++
 jobs.yaml                |  6 ++++++
 jobs/example.py          | 21 +++++++++++++++++++++
 main.py => jobs/utils.py | 10 ++--------
 root                     |  7 +++----
 run.sh                   |  6 ++++++
 8 files changed, 67 insertions(+), 16 deletions(-)
 create mode 100644 jobs.yaml
 create mode 100644 jobs/example.py
 rename main.py => jobs/utils.py (75%)
 create mode 100644 run.sh

diff --git a/.gitignore b/.gitignore
index c5ab79b..3df3924 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 venv
 __pycache__/
-build.sh
\ No newline at end of file
+build.sh
+docker-env.txt
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index d899251..cc958fa 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,12 +1,16 @@
 FROM python:3.10-alpine
 
 WORKDIR /app
-COPY requirements.txt main.py ./
+
+COPY requirements.txt ./
 RUN pip install -r requirements.txt
 
-RUN apk update && apk add openrc
+COPY ./jobs ./jobs
+COPY jobs.yaml run.sh ./
+RUN chmod +x /app/run.sh
+
+RUN apk update && apk add openrc bash yq
 RUN rc-service /usr/sbin/crond start && rc-update add /usr/sbin/crond
-RUN ls /etc/crontabs/root
 COPY root /etc/crontabs/root
 
 CMD ["/usr/sbin/crond", "-f"]
\ No newline at end of file
diff --git a/README.md b/README.md
index 9c6af2d..b5d78af 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,26 @@ A scheduled updater that retrieves data from sources and sends the data over to
 
 This updater will run scripts daily, weekly and monthly etc. based on cron jobs.
 
+## What is the idea?
+The design thought was to change the responsibility of storing of the scripts from the data-management-api to GitLab itself. When we designed the concept of running the scripts using `subprocess`, we were not sure about the dependencies we would need for the jobs and how they would effect this process. This will prevent any issue with dependencies not being able to be imported.
+
+With the possible security concerns, this proposal moves the scripts to GitLab. This also has the added benefit of everything GitLab has to offer. Like history of the scripts and ways to implement "approval" of scripts.
+
+## How to make a new job?
+1. Clone the repo
+2. Create a new branch with a logical name, like the parameter you're updating, and checkout to it
+3. Copy the `example.py` file in jobs and edit it to suite your needs. Make sure to edit the `path` in the `update()` function to point to the parameter you want to update.
+4. Edit the `jobs.yaml` and add the name of your file to whatever frequency you need.
+5. Push your changed to the repo in the new branch
+6. Create merge request
+
+
+## Process explained
+1. Container gets build and installs everything in `requirements.txt`
+2. Container is run and cron is spun up to trigger `run.sh` with the specified frequency.
+3.`run.sh` loops over the array of filenames provided in `jobs.yaml` and runs `python <file>` on them
+4. Each file ends up calling `update()`, which in turn logs into the API using Azure AD, and sends over the data that should be updated on the given parameter.
+
 ## Installation
 
 ```bash
diff --git a/jobs.yaml b/jobs.yaml
new file mode 100644
index 0000000..d9bb530
--- /dev/null
+++ b/jobs.yaml
@@ -0,0 +1,6 @@
+hourly:
+  - example
+daily:
+  - example
+weekly:
+  - example
\ No newline at end of file
diff --git a/jobs/example.py b/jobs/example.py
new file mode 100644
index 0000000..087a911
--- /dev/null
+++ b/jobs/example.py
@@ -0,0 +1,21 @@
+from utils import update
+data = "bar!"
+
+# In this file, put the script to run job
+
+"""
+    The path to the parameter that you want to update is to be indicated
+    in the following format:
+    "path.to.parameter"
+
+    This edits:
+    {
+        path: {
+            to: {
+                parameter: <------------
+            }
+        }
+    }
+"""
+
+update("foo", data)
diff --git a/main.py b/jobs/utils.py
similarity index 75%
rename from main.py
rename to jobs/utils.py
index d5f1040..f0dda38 100644
--- a/main.py
+++ b/jobs/utils.py
@@ -1,4 +1,3 @@
-import sys
 import os
 import requests
 import json
@@ -22,11 +21,6 @@ def login():
     return response.json()["access_token"]
 
 
-def push_update(frequency, token):
-    response = requests.post(f"{host}/update/{frequency}", headers={"Authorization": f"Bearer {token}"})
+def update(parameter_path, data):
+    response = requests.post(f"{host}/update/{parameter_path}", json={"data": data}, headers={"Authorization": f"Bearer {login()}"})
     print(json.dumps(response.json(), indent=4))
-
-
-if __name__ == "__main__":
-    frequency = str(sys.argv[1])
-    push_update(frequency, login())
diff --git a/root b/root
index a73c0b1..056652d 100644
--- a/root
+++ b/root
@@ -5,7 +5,6 @@
 0	    2	    *	    *	    *	    run-parts /etc/periodic/daily
 0	    3	    *	    *	    6	    run-parts /etc/periodic/weekly
 0	    5	    1	    *	    *	    run-parts /etc/periodic/monthly
-0       *       *       *       *       python3 /app/main.py hourly
-0       1       *       *       *       python3 /app/main.py daily
-0       1       *       *       1       python3 /app/main.py weekly
-0       1       1       *       *       python3 /app/main.py monthly
\ No newline at end of file
+0       1       *       *       *       /app/run.sh daily
+0       1       *       *       1       /app/run.sh weekly
+0       1       1       *       *       /app/run.sh monthly
\ No newline at end of file
diff --git a/run.sh b/run.sh
new file mode 100644
index 0000000..10fb590
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,6 @@
+cd /app
+jobs=$(yq ".$1 | .[]" jobs.yaml | tr -d '"')
+echo "$jobs" | while IFS=',' read -r job; do
+    echo "Processing element: $job"
+    python "jobs/$job.py"
+done
\ No newline at end of file
-- 
GitLab