Commit a34b135e authored by smetaj's avatar smetaj
Browse files

Merge branch 'qa-v0.4_stiven' of...

Merge branch 'qa-v0.4_stiven' of ssh://gitlab.cern.ch:7999/cloud-infrastructure/data-analytics into qa-v0.4_stiven
parents d91f0e95 3aeb752d
stages:
- build-base-image
- build-base-image
- test-components
- build-qa-images
- build-images
- test-qa
- build-prod-images
- test-prod
......@@ -49,7 +48,7 @@ build_tox_image:
- docker-images/tox/*
#------------------------------------
# Build image to be used in tox tests
# Build base image for the spark notebook based on swan image
build_sparknotebook_base_image:
stage: build-base-image
before_script:
......@@ -64,162 +63,88 @@ build_sparknotebook_base_image:
changes:
- docker-images/sparknotebook/Dockerfile-swan-base
############################################################
##### BUILD QA IMAGES #####
############################################################
#-------------------------------------------------------------------------------------
# Build image that runs jupyter notebook with the data-analytics libraries installed
# The same image can be used interactively to start a jupyter notebook
job_build_jupyter_qa_image:
stage: build-qa-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/jupyter/Dockerfile
- export CONTEXT=$CI_PROJECT_DIR
- export IMAGE_NAME=jupyter
- export IMAGE_TAG=$CI_COMMIT_BRANCH
<<: *template_build_image
only:
variables:
- $CI_COMMIT_BRANCH =~ /^qa.*$/
#-----------------------------
# Promote image as prod one
job_build_jupyter_prod_image:
stage: build-prod-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/jupyter/Dockerfile
- export CONTEXT=$CI_PROJECT_DIR
- export IMAGE_NAME=jupyter
- export IMAGE_TAG=$CI_COMMIT_TAG
<<: *template_build_image
only:
variables:
- $CI_COMMIT_TAG =~ /^v.*$/
#-------------------------------------------------------------------------------------
# Build image that containts docker compose to
# test pipelines in docker compose
# in privileged runner
job_build_compose_qa_image:
stage: build-qa-images
build_compose_image:
stage: build-base-image
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/compose/Dockerfile
- export CONTEXT=$CI_PROJECT_DIR/docker-images/compose
- export IMAGE_NAME=compose
- export IMAGE_TAG=$CI_COMMIT_BRANCH
- export IMAGE_TAG=latest
<<: *template_build_image
only:
variables:
- $CI_COMMIT_BRANCH =~ /^qa.*$/
changes:
- docker-images/compose/*
rules:
- if: '$CI_COMMIT_BRANCH =~ /^qa.*$/'
changes:
- docker-images/compose/*
- if: '$CI_COMMIT_TAG =~ /^v.*$/'
############################################################
##### BUILD QA IMAGES #####
############################################################
#-----------------------------
# Promote image as prod one
job_build_compose_prod_image:
stage: build-prod-images
#-------------------------------------------------------------------------------------
# Build image that runs jupyter notebook with the data-analytics libraries installed
# The same image can be used interactively to start a jupyter notebook
build_jupyter_image:
stage: build-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/compose/Dockerfile
- export CONTEXT=$CI_PROJECT_DIR/docker-images/compose
- export IMAGE_NAME=compose
- export IMAGE_TAG=$CI_COMMIT_TAG
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/jupyter/Dockerfile
- export CONTEXT=$CI_PROJECT_DIR
- export IMAGE_NAME=jupyter
- export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH}
<<: *template_build_image
only:
variables:
- $CI_COMMIT_TAG =~ /^v.*$/
rules:
- if: '$CI_COMMIT_BRANCH =~ /^qa.*$/ || $CI_COMMIT_TAG =~ /^v.*$/'
#-------------------------------------------------------------------------------------
# Build fluentd image with plugins
job_build_fluentd_qa_image:
stage: build-qa-images
build_fluentd_image:
stage: build-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/fluentd/Dockerfile
- export CONTEXT=$CI_PROJECT_DIR/docker-images/fluentd
- export IMAGE_NAME=fluentd
- export IMAGE_TAG=$CI_COMMIT_BRANCH
- export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH}
<<: *template_build_image
only:
variables:
- $CI_COMMIT_BRANCH =~ /^qa.*$/
#-----------------------------
# Promote image as prod one
job_build_fluentd_prod_image:
stage: build-prod-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/fluentd/Dockerfile
- export CONTEXT=$CI_PROJECT_DIR/docker-images/fluentd
- export IMAGE_NAME=fluentd
- export IMAGE_TAG=$CI_COMMIT_TAG
<<: *template_build_image
only:
variables:
- $CI_COMMIT_TAG =~ /^v.*$/
rules:
- if: '$CI_COMMIT_BRANCH =~ /^qa.*$/ || $CI_COMMIT_TAG =~ /^v.*$/'
#-------------------------------------------------------------------------------------
# Build image that runs swan spark notebook with the data-analytics libraries installed
# The same image can be used interactively to query spark
job_build_spark_qa_image:
stage: build-qa-images
build_spark_image:
stage: build-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/sparknotebook/Dockerfile
- export CONTEXT=$CI_PROJECT_DIR/
- export IMAGE_NAME=sparknotebook
- export IMAGE_TAG=$CI_COMMIT_BRANCH
<<: *template_build_image
only:
variables:
- $CI_COMMIT_BRANCH =~ /^qa.*$/
#-----------------------------
# Promote image as prod one
job_build_spark_prod_image:
stage: build-prod-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/sparknotebook/Dockerfile
- export CONTEXT=$CI_PROJECT_DIR/
- export IMAGE_NAME=sparknotebook
- export IMAGE_TAG=$CI_COMMIT_TAG
- export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH}
<<: *template_build_image
only:
variables:
- $CI_COMMIT_TAG =~ /^v.*$/
rules:
- if: '$CI_COMMIT_BRANCH =~ /^qa.*$/ || $CI_COMMIT_TAG =~ /^v.*$/'
#-------------------------------------------------------------------------------------
# Build image that runs airflow image
job_build_airflow_qa_image:
stage: build-qa-images
build_airflow_image:
stage: build-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/docker-airflow/Dockerfile
- export CONTEXT=$CI_PROJECT_DIR/docker-images/docker-airflow/
- export IMAGE_NAME=docker-airflow
- export IMAGE_TAG=$CI_COMMIT_BRANCH
- export IMAGE_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH}
<<: *template_build_image
only:
variables:
- $CI_COMMIT_BRANCH =~ /^qa.*$/
#-----------------------------
# Promote image as prod one
job_build_airflow_prod_image:
stage: build-prod-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/docker-airflow/Dockerfile
- export CONTEXT=$CI_PROJECT_DIR/docker-images/docker-airflow
- export IMAGE_NAME=docker-airflow
- export IMAGE_TAG=$CI_COMMIT_TAG
<<: *template_build_image
only:
variables:
- $CI_COMMIT_TAG =~ /^v.*$/
rules:
- if: '$CI_COMMIT_BRANCH =~ /^qa.*$/ || $CI_COMMIT_TAG =~ /^v.*$/'
############################################################
##### TEST COMPONENTS #####
############################################################
pep8:
.pep8:
stage: test-components
image: $CI_REGISTRY_IMAGE/tox:latest
except:
......@@ -228,7 +153,7 @@ pep8:
- cd $CI_PROJECT_DIR
- scl enable rh-python36 'tox -epep8'
coverage:
.coverage:
stage: test-components
image: $CI_REGISTRY_IMAGE/tox:latest
except:
......@@ -247,7 +172,7 @@ coverage:
# ----------------------------------------------------
# Test data extraction from grafana using grafana_etl
job_qa_test_grafana_etl: &template_test_grafana_etl
qa_test_grafana_etl: &template_test_grafana_etl
stage: test-qa
image:
name: $CI_REGISTRY_IMAGE/jupyter:${CI_COMMIT_BRANCH}
......@@ -267,7 +192,9 @@ job_qa_test_grafana_etl: &template_test_grafana_etl
expire_in: 1 week
when: always
job_prod_test_grafana_etl:
# Need to duplicate test because image name cannot be template to
# $CI_REGISTRY_IMAGE/jupyter:${CI_COMMIT_TAG:-${CI_COMMIT_BRANCH}}
prod_test_grafana_etl:
<<: *template_test_grafana_etl
stage: test-prod
image:
......@@ -279,46 +206,38 @@ job_prod_test_grafana_etl:
# -----------------------------------------------
# Test data extraction from spark using spark_etl
job_qa_spark_etl: &template_test_spark_etl
spark_etl: &template_test_spark_etl
stage: test-qa
tags:
- data-analytics-spark-ci # for private runner
image:
name: $CI_REGISTRY_IMAGE/compose:v0.1
entrypoint: [""]
only:
variables:
- $CI_COMMIT_BRANCH =~ /^qa.*$/
rules:
- if: '$CI_COMMIT_BRANCH =~ /^qa.*$/ || $CI_COMMIT_TAG =~ /^v.*$/'
script:
- ${CI_PROJECT_DIR}/tests/spark_etl/ci_test_script.sh
- . ${CI_PROJECT_DIR}/tests/spark_etl/ci_test_script.sh
- start_docker_compose
after_script:
- cd $CI_PROJECT_DIR/tests/spark_etl
- docker-compose down --remove-orphans --volumes
- . ${CI_PROJECT_DIR}/tests/spark_etl/ci_test_script.sh
- stop_docker_compose
artifacts:
paths:
- $CI_PROJECT_DIR/tests/spark_etl/*
expire_in: 1 week
when: always
job_prod_test_spark_etl:
<<: *template_test_spark_etl
stage: test-prod
only:
variables:
- $CI_COMMIT_TAG =~ /^v.*$/
# -----------------------------------------------------
# Test fluentd pipeline to push data into Elasticsearch
job_qa_pipeline_anomaly_to_ES: &template_pipeline_anomaly_to_ES
pipeline_anomaly_to_ES: &template_pipeline_anomaly_to_ES
stage: test-qa
tags:
- data-analytics-spark-ci # for private runner
image:
name: $CI_REGISTRY_IMAGE/compose:v0.1
entrypoint: [""]
only:
variables:
- $CI_COMMIT_BRANCH =~ /^qa.*$/
rules:
- if: '$CI_COMMIT_BRANCH =~ /^qa.*$/ || $CI_COMMIT_TAG =~ /^v.*$/'
before_script:
- . $CI_PROJECT_DIR/tests/anomaly_to_ES/ci_run_script.sh
- start_docker_compose
......@@ -335,12 +254,25 @@ job_qa_pipeline_anomaly_to_ES: &template_pipeline_anomaly_to_ES
expire_in: 1 week
when: always
job_prod_pipeline_anomaly_to_ES:
<<: *template_pipeline_anomaly_to_ES
stage: test-prod
# -----------------------------------------------
# Test adcern
qa_adcern:
stage: test-qa
tags:
- data-analytics-spark-ci # for private runner
image:
name: $CI_REGISTRY_IMAGE/compose:${CI_COMMIT_TAG}
name: $CI_REGISTRY_IMAGE/compose:v0.1
entrypoint: [""]
only:
variables:
- $CI_COMMIT_TAG =~ /^v.*$/
rules:
- if: '$CI_COMMIT_BRANCH =~ /^qa.*$/ || $CI_COMMIT_TAG =~ /^v.*$/'
script:
- . ${CI_PROJECT_DIR}/tests/adcern/integration/ci_test_script.sh
- start_docker_compose
after_script:
- . ${CI_PROJECT_DIR}/tests/adcern/integration/ci_test_script.sh
- stop_docker_compose
artifacts:
paths:
- $CI_PROJECT_DIR/tests/adcern/integration/*
expire_in: 1 week
when: always
......@@ -321,8 +321,11 @@ def save_scores_local_parquet(algorithm_name,
# CONSOLE APPLICATION
# see https://github.com/pallets/click/issues/1123
def normalize_names(name):
return name.replace("_", "-")
@click.group()
@click.group(context_settings={"token_normalize_func": normalize_names})
def cli():
print("Welcome in the Mining and Detection CLI.")
......@@ -330,7 +333,7 @@ def cli():
@cli.command()
@click.option('--resource_file', default="",
help="""path to json file defining whcih normaliz to check.""")
def normalizationpresence(resource_file):
def normalization_presence(resource_file):
"""Check for the presence of normalization coefficients."""
# PREPARE SPARK
sc, spark, conf = spark_preparation()
......@@ -353,7 +356,7 @@ def normalizationpresence(resource_file):
@cli.command()
@click.option('--resource_file', default="",
help="""path to json file defining which coeff to compute.""")
def computenormalization(resource_file):
def compute_normalization(resource_file):
"""Compute normalization coefficients."""
# PREPARE SPARK
sc, spark, conf = spark_preparation()
......@@ -380,7 +383,7 @@ def computenormalization(resource_file):
@cli.command()
@click.option('--resource_file', default="",
help="""path to json file defining the data we need.""")
def datapresence(resource_file):
def data_presence(resource_file):
# PREPARE SPARK
# sc, spark, conf = spark_preparation()
# READ RESOURCE FILE
......@@ -402,7 +405,7 @@ def datapresence(resource_file):
@cli.command()
@click.option('--resource_file', default="",
help="""path to json file defining what to download.""")
def downloaddata(resource_file):
def download_data(resource_file):
# PREPARE SPARK
sc, spark, conf = spark_preparation()
# READ RESOURCE FILE
......@@ -422,7 +425,7 @@ def downloaddata(resource_file):
@cli.command()
@click.option('--resource_file', default="",
help="""path to json file defining what to cache.""")
def cachelocally(resource_file):
def cache_locally(resource_file):
"""Cache your data locally (aka move them from spark to local disk)."""
# PREPARE SPARK
sc, spark, conf = spark_preparation()
......@@ -724,9 +727,17 @@ def analysis(module_name, class_name, alias_name, hyperparameters,
# PUBLISH in FLUENTD
# create ordered score
# PercScore has no attribude std_scores
if class_name == "PercScore":
standardized_values_tmp = False
else:
standardized_values_tmp = True
ordered_hosts, ordered_scores = zip(*critical_individuals)
algo.publish_top_k(ts_second_window_end=ts,
top_k=PUBLISH_PER_WINDOW,
standardized_values=standardized_values_tmp,
validity=True)
print("Save to Parquet...")
......@@ -743,6 +754,8 @@ def analysis(module_name, class_name, alias_name, hyperparameters,
print("Iteration on windows finished")
pass
def main():
cli()
if __name__ == '__main__':
cli()
main()
......@@ -25,8 +25,11 @@ MY_END_WEEK = datetime(year=2020, month=8, day=16)
# CONSOLE APPLICATION
# see https://github.com/pallets/click/issues/1123
def normalize_names(name):
return name.replace("_", "-")
@click.group()
@click.group(context_settings={"token_normalize_func": normalize_names})
def cli():
print("Welcome in the visualization CLI.")
......@@ -36,7 +39,7 @@ def cli():
help='''path where to look for parquet files to merge.''')
@click.option('--output_folder', default="",
help='''path where to save combined_scores.parquet.''')
def mergedb(input_folder, output_folder):
def merge_db(input_folder, output_folder):
"""Merge Parquet DB in the folder."""
# get db files
onlyfiles = [f for f in os.listdir(input_folder)
......@@ -69,7 +72,7 @@ def mergedb(input_folder, output_folder):
help='''path where to save the png plot file.''')
@click.option('--fixed_scale', default="True",
help='''if you want to bing your scores on a fixed scale.''')
def createcorrscore(hostgroup, input_folder, output_folder, fixed_scale):
def create_corr_score(hostgroup, input_folder, output_folder, fixed_scale):
"""Plot the correlations among scores."""
print('Creating correlation scores!')
# READ ALL SCORES
......@@ -174,7 +177,7 @@ def createcorrscore(hostgroup, input_folder, output_folder, fixed_scale):
help='''full name of the hostgroup to inspect.''')
@click.option('--input_folder', default="",
help='''path where to look for labels.''')
def labelpresence(hostgroup, input_folder):
def label_presence(hostgroup, input_folder):
"""Check if labels for this hostgroup are already present."""
name_hostgroup = hostgroup.split("/")[-1]
in_csv = input_folder + "/" + name_hostgroup + "_labels.csv"
......@@ -190,7 +193,7 @@ def labelpresence(hostgroup, input_folder):
help='''full name of the hostgroup to extract.''')
@click.option('--output_folder', default="",
help='''path where to save the csv file.''')
def extractannotation(grafana_token, hostgroup, output_folder):
def extract_annotation(grafana_token, hostgroup, output_folder):
"""Extract the annotation from grafana and save a csv file."""
# API call to grafana
jres = \
......@@ -219,7 +222,7 @@ def extractannotation(grafana_token, hostgroup, output_folder):
help='''json with info about the granularity.''')
@click.option('--output_folder', default="",
help='''path where to save the csv file.''')
def createlabels(hostgroup, input_folder, analysis_file,
def create_labels(hostgroup, input_folder, analysis_file,
config_file, output_folder):
"""Convert interval annotations into window labels.
......@@ -262,7 +265,7 @@ def createlabels(hostgroup, input_folder, analysis_file,
help='''path where to find labels csv file.''')
@click.option('--output_folder', default="",
help='''path where to save the plotting image.''')
def visualizelabels(hostgroup, input_folder, output_folder):
def visualize_labels(hostgroup, input_folder, output_folder):
"""Visualize label."""
# read the labels
name_hostgroup = hostgroup.split("/")[-1]
......@@ -294,7 +297,7 @@ def visualizelabels(hostgroup, input_folder, output_folder):
help='''end of evaluation period.
Note the unusual presence of underscore "_"
e.g. "2020-03-01_00:00:00"''')
def scorebenchmark(folder_scores, hostgroup,
def score_benchmark(folder_scores, hostgroup,
labels_folder, algo_name, family,
start, end):
"""Score the algorithm anomaly scores agains the labels."""
......@@ -451,7 +454,7 @@ def scorebenchmark(folder_scores, hostgroup,
help='''path where to find week_metrics.db file.''')
@click.option('--output_folder', default="",
help='''path where to save the plotting image.''')
def visualizeauc(hostgroup, input_folder, output_folder):
def visualize_auc(hostgroup, input_folder, output_folder):
"""Visualize the AUC results form the selected algo."""
# read the database of scores in a pandas DF
conn = sqlite3.connect(input_folder + "/week_metrics.db")
......@@ -540,13 +543,15 @@ def visualizeauc(hostgroup, input_folder, output_folder):
@cli.command()
@click.option('--input_folder', default="",
help='''path where to remove week_metrics.db file.''')
def removeolddatabase(input_folder):
def remove_old_database(input_folder):
"""Remove old database for weekly AUC results."""
try:
os.remove(input_folder + "/week_metrics.db")
except Exception as e:
print(e)
def main():
cli()
if __name__ == '__main__':
cli()
main()
# Control Room
# Anomaly Detection Pipeline based on Airflow
Control room contains the procedures to deploy the Airflow setup and automate the Anomaly Detection task.
The Anomaly Detection task can also be run in an automatic way.
For doing that we rely on [Apache Airflow](https://airflow.apache.org/).
To have an easy to use environment we encapsulated all the required blocks (Airflow included) in Docker Containers that can be run thanks to Docker compose.
The Airflow Docker compose is heavily based on examples found in https://github.com/puckel/docker-airflow
This area is called `Control room` and contains the procedures to deploy the Airflow setup and automate the Anomaly Detection task.
The folder includes
......@@ -8,11 +14,14 @@ The folder includes
To be run once when a new machine needs to be configured
1. Docker-compose configuration ([link](airflow-compose))<br>
To setup the Airflow system
1. Configuration files ([link](config_file))<br>
Configuration files used for .... #FIXME
1. Docker-swarm configuration - w.i.p. ([link](docker-swarm))<br>
Steps:
## Getting started
We suggest to run on a dedicated virtual machine VM that can be provisioned on the [OpenStack CERN Platform](https://openstack.cern.ch/). For initial test we suggest to start with a flavor providing at least 7 GB of RAM.
1. Login to the VM (**tested on CentOS 7**) with the following port forwarding:
```
......@@ -46,43 +55,39 @@ curl https://gitlab.cern.ch/cloud-infrastructure/data-analytics/-/raw/$branch/co
install_all
```
Follow then the instructions printed by the install_AD.sh script to finalise the setup
3. Start the docker compose of the Airflow-based Anomaly Detection System with the following command:
```
/opt/control_ad_system/airflow-compose/start_ad_system.sh
sudo -u airflow /opt/ad_system/control_ad_system/start_ad_system.sh
```
NB: the script `/opt/ad_system/control_ad_system/start_ad_system.sh` can also be sourced, to easily delete the running docker-compose setup
**Congratulation!** You just complete the full installation of your Anomaly Detection System.
## Getting started with your first Anomaly Detection DAG
### Getting started with Anomaly Detection DAG
Now that Airflow is up and running we can test the Anomaly Detection System and
its algorithms on a demo scenario.
Follow these steps:
1. Open the File Browser http://localhost:5003/ and login (username = admin, pass = admin), Navigate to the folder **/airflow-compose/dags** and open the file
**config_variables.py**. There you have to change comments on the deploy section:
```
# DEPLOY
SYSTEM_FOLDER = "..."
DATALAKE_FOLDER = "..."
TMP_CONFIG = "..."
IMAGE_NAME = "..."
```
and comment the developement section:
```
# DEVELOPEMENT
# SYSTEM_FOLDER = "..."
# DATALAKE_FOLDER = "..."
# TMP_CONFIG = "..."
# IMAGE_NAME = "..."
```
1. Open the Airflow UI: http://localhost:8080/
1. Search for the dag named **dag_ad_demo** and click on its name.
1. Click on the *graph view* tab to see the interconnection between different tasks
1. Click on the **on/off switch** nex to the header *DAG: dag_ad_demo*.
1. Click on the **on/off switch** next to the header *DAG: dag_ad_demo*.
**Congratulation!** You just started your first Anomaly Detection pipeline. Check then its successful termination via the *graph view*,
when all the boxes are dark green the pipeline is completed.
## Additional Documentation
The Anomaly Detection System driven by Airflow can be started not only with Docker compose (our implemented choice) but also with
Docker Swarm (requires a Swarm cluster is already up) or using Kubernetes. These two methods are still work in progress.
**Congratulation!** You just started your first Anomaly Detection pipeline. Check the its successful termination via the *graph view*, when all the boxes are dark green the pipeline is completed.
- In case of a docker swarm, to continue the work look at the scripts in folder [docker-swarm](./docker-swarm)
- For Kubernetes documentation can be found at
- https://kubernetes.io/blog/2018/06/28/airflow-on-kubernetes-part-1-a-different-kind-of-operator/
- https://airflow.apache.org/docs/stable/kubernetes.html
> **_NOTE:_** The file browser is used to create new Airflow DAG (Direct Acyclic Graphs) and to modify the configuration files. Access it from here http://localhost:5003/ with username = admin, pass = admin.
# Anomaly Detection System driven by Airflow
**NB**:
In these examples there are dummy passwords, in the secret.sh file
those passwords are here only as example for a simple local test.
!!! Do not commit real production passwords !!!
In these examples there are dummy passwords stored in the secret.sh file.
Those passwords are here only as example for a simple local test.
**!!! Do not commit real production passwords !!!**