diff --git a/.gitignore b/.gitignore index c5ab79bacf3e6890493e88ee6428a83ab62c948e..3df3924669e1c92b5c5ca13dd0b99636614a3ec0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ venv __pycache__/ -build.sh \ No newline at end of file +build.sh +docker-env.txt \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index d89925115320c1f918d867ac55e2409f84f2b1d9..cc958fad660e35a0a02d51e1446d11d6266e547d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,16 @@ FROM python:3.10-alpine WORKDIR /app -COPY requirements.txt main.py ./ + +COPY requirements.txt ./ RUN pip install -r requirements.txt -RUN apk update && apk add openrc +COPY ./jobs ./jobs +COPY jobs.yaml run.sh ./ +RUN chmod +x /app/run.sh + +RUN apk update && apk add openrc bash yq RUN rc-service /usr/sbin/crond start && rc-update add /usr/sbin/crond -RUN ls /etc/crontabs/root COPY root /etc/crontabs/root CMD ["/usr/sbin/crond", "-f"] \ No newline at end of file diff --git a/README.md b/README.md index 9c6af2d5a17a6bda77a608828970407e8ded78c8..b5d78aff03c8de7ff4e9b7983a479bd656fbc88a 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,26 @@ A scheduled updater that retrieves data from sources and sends the data over to This updater will run scripts daily, weekly and monthly etc. based on cron jobs. +## What is the idea? +The design thought was to change the responsibility of storing of the scripts from the data-management-api to GitLab itself. When we designed the concept of running the scripts using `subprocess`, we were not sure about the dependencies we would need for the jobs and how they would effect this process. This will prevent any issue with dependencies not being able to be imported. + +With the possible security concerns, this proposal moves the scripts to GitLab. This also has the added benefit of everything GitLab has to offer. Like history of the scripts and ways to implement "approval" of scripts. + +## How to make a new job? +1. Clone the repo +2. Create a new branch with a logical name, like the parameter you're updating, and checkout to it +3. Copy the `example.py` file in jobs and edit it to suite your needs. Make sure to edit the `path` in the `update()` function to point to the parameter you want to update. +4. Edit the `jobs.yaml` and add the name of your file to whatever frequency you need. +5. Push your changed to the repo in the new branch +6. Create merge request + + +## Process explained +1. Container gets build and installs everything in `requirements.txt` +2. Container is run and cron is spun up to trigger `run.sh` with the specified frequency. +3.`run.sh` loops over the array of filenames provided in `jobs.yaml` and runs `python <file>` on them +4. Each file ends up calling `update()`, which in turn logs into the API using Azure AD, and sends over the data that should be updated on the given parameter. + ## Installation ```bash diff --git a/jobs.yaml b/jobs.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d9bb530872f88b16b612b0e7de7e915fd1663cf5 --- /dev/null +++ b/jobs.yaml @@ -0,0 +1,6 @@ +hourly: + - example +daily: + - example +weekly: + - example \ No newline at end of file diff --git a/jobs/example.py b/jobs/example.py new file mode 100644 index 0000000000000000000000000000000000000000..087a911d6e96020cdbd4a965345754c270e78bf5 --- /dev/null +++ b/jobs/example.py @@ -0,0 +1,21 @@ +from utils import update +data = "bar!" + +# In this file, put the script to run job + +""" + The path to the parameter that you want to update is to be indicated + in the following format: + "path.to.parameter" + + This edits: + { + path: { + to: { + parameter: <------------ + } + } + } +""" + +update("foo", data) diff --git a/main.py b/jobs/utils.py similarity index 75% rename from main.py rename to jobs/utils.py index d5f1040416992179e633dcd4d696e4852486c43f..f0dda38ca9fb57e4bfa616519e722933be679786 100644 --- a/main.py +++ b/jobs/utils.py @@ -1,4 +1,3 @@ -import sys import os import requests import json @@ -22,11 +21,6 @@ def login(): return response.json()["access_token"] -def push_update(frequency, token): - response = requests.post(f"{host}/update/{frequency}", headers={"Authorization": f"Bearer {token}"}) +def update(parameter_path, data): + response = requests.post(f"{host}/update/{parameter_path}", json={"data": data}, headers={"Authorization": f"Bearer {login()}"}) print(json.dumps(response.json(), indent=4)) - - -if __name__ == "__main__": - frequency = str(sys.argv[1]) - push_update(frequency, login()) diff --git a/root b/root index a73c0b18d55226f704132839f476d442efc25a5c..056652dabd5aea00bf3a97f477b284c6e72c3e18 100644 --- a/root +++ b/root @@ -5,7 +5,6 @@ 0 2 * * * run-parts /etc/periodic/daily 0 3 * * 6 run-parts /etc/periodic/weekly 0 5 1 * * run-parts /etc/periodic/monthly -0 * * * * python3 /app/main.py hourly -0 1 * * * python3 /app/main.py daily -0 1 * * 1 python3 /app/main.py weekly -0 1 1 * * python3 /app/main.py monthly \ No newline at end of file +0 1 * * * /app/run.sh daily +0 1 * * 1 /app/run.sh weekly +0 1 1 * * /app/run.sh monthly \ No newline at end of file diff --git a/run.sh b/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..10fb5909c0f0eec339faa1a25f88e8968997d1c2 --- /dev/null +++ b/run.sh @@ -0,0 +1,6 @@ +cd /app +jobs=$(yq ".$1 | .[]" jobs.yaml | tr -d '"') +echo "$jobs" | while IFS=',' read -r job; do + echo "Processing element: $job" + python "jobs/$job.py" +done \ No newline at end of file