Commit 79856c94 authored by Domenico Giordano's avatar Domenico Giordano
Browse files

Continue cleaning of the installation procedures

parent dd3915ae
stages:
- build-base-image
- build-base-image
- test-components
- build-qa-images
- test-qa
......@@ -85,7 +85,7 @@ job_build_jupyter_qa_image:
#-----------------------------
# Promote image as prod one
job_build_jupyter_prod_image:
build_jupyter_prod_image:
stage: build-prod-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/jupyter/Dockerfile
......@@ -101,7 +101,7 @@ job_build_jupyter_prod_image:
# Build image that containts docker compose to
# test pipelines in docker compose
# in privileged runner
job_build_compose_qa_image:
build_compose_qa_image:
stage: build-qa-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/compose/Dockerfile
......@@ -117,7 +117,7 @@ job_build_compose_qa_image:
#-----------------------------
# Promote image as prod one
job_build_compose_prod_image:
build_compose_prod_image:
stage: build-prod-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/compose/Dockerfile
......@@ -131,7 +131,7 @@ job_build_compose_prod_image:
#-------------------------------------------------------------------------------------
# Build fluentd image with plugins
job_build_fluentd_qa_image:
build_fluentd_qa_image:
stage: build-qa-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/fluentd/Dockerfile
......@@ -145,7 +145,7 @@ job_build_fluentd_qa_image:
#-----------------------------
# Promote image as prod one
job_build_fluentd_prod_image:
build_fluentd_prod_image:
stage: build-prod-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/fluentd/Dockerfile
......@@ -160,7 +160,7 @@ job_build_fluentd_prod_image:
#-------------------------------------------------------------------------------------
# Build image that runs swan spark notebook with the data-analytics libraries installed
# The same image can be used interactively to query spark
job_build_spark_qa_image:
build_spark_qa_image:
stage: build-qa-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/sparknotebook/Dockerfile
......@@ -174,7 +174,7 @@ job_build_spark_qa_image:
#-----------------------------
# Promote image as prod one
job_build_spark_prod_image:
build_spark_prod_image:
stage: build-prod-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/sparknotebook/Dockerfile
......@@ -188,7 +188,7 @@ job_build_spark_prod_image:
#-------------------------------------------------------------------------------------
# Build image that runs airflow image
job_build_airflow_qa_image:
build_airflow_qa_image:
stage: build-qa-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/docker-airflow/Dockerfile
......@@ -202,7 +202,7 @@ job_build_airflow_qa_image:
#-----------------------------
# Promote image as prod one
job_build_airflow_prod_image:
build_airflow_prod_image:
stage: build-prod-images
before_script:
- export DOCKERFILE=$CI_PROJECT_DIR/docker-images/docker-airflow/Dockerfile
......@@ -219,7 +219,7 @@ job_build_airflow_prod_image:
##### TEST COMPONENTS #####
############################################################
pep8:
.pep8:
stage: test-components
image: $CI_REGISTRY_IMAGE/tox:latest
except:
......@@ -228,7 +228,7 @@ pep8:
- cd $CI_PROJECT_DIR
- scl enable rh-python36 'tox -epep8'
coverage:
.coverage:
stage: test-components
image: $CI_REGISTRY_IMAGE/tox:latest
except:
......@@ -247,7 +247,7 @@ coverage:
# ----------------------------------------------------
# Test data extraction from grafana using grafana_etl
job_qa_test_grafana_etl: &template_test_grafana_etl
qa_test_grafana_etl: &template_test_grafana_etl
stage: test-qa
image:
name: $CI_REGISTRY_IMAGE/jupyter:${CI_COMMIT_BRANCH}
......@@ -267,7 +267,7 @@ job_qa_test_grafana_etl: &template_test_grafana_etl
expire_in: 1 week
when: always
job_prod_test_grafana_etl:
prod_test_grafana_etl:
<<: *template_test_grafana_etl
stage: test-prod
image:
......@@ -279,7 +279,7 @@ job_prod_test_grafana_etl:
# -----------------------------------------------
# Test data extraction from spark using spark_etl
job_qa_spark_etl: &template_test_spark_etl
qa_spark_etl: &template_test_spark_etl
stage: test-qa
tags:
- data-analytics-spark-ci # for private runner
......@@ -300,7 +300,7 @@ job_qa_spark_etl: &template_test_spark_etl
expire_in: 1 week
when: always
job_prod_test_spark_etl:
prod_test_spark_etl:
<<: *template_test_spark_etl
stage: test-prod
only:
......@@ -309,7 +309,7 @@ job_prod_test_spark_etl:
# -----------------------------------------------------
# Test fluentd pipeline to push data into Elasticsearch
job_qa_pipeline_anomaly_to_ES: &template_pipeline_anomaly_to_ES
qa_pipeline_anomaly_to_ES: &template_pipeline_anomaly_to_ES
stage: test-qa
tags:
- data-analytics-spark-ci # for private runner
......@@ -335,7 +335,7 @@ job_qa_pipeline_anomaly_to_ES: &template_pipeline_anomaly_to_ES
expire_in: 1 week
when: always
job_prod_pipeline_anomaly_to_ES:
prod_pipeline_anomaly_to_ES:
<<: *template_pipeline_anomaly_to_ES
stage: test-prod
image:
......
......@@ -48,9 +48,11 @@ install_all
3. Start the docker compose of the Airflow-based Anomaly Detection System with the following command:
```
/opt/control_ad_system/airflow-compose/start_ad_system.sh
sudo -u airflow /opt/ad_system/control_ad_system/start_ad_system.sh
```
NB: the script `/opt/ad_system/control_ad_system/start_ad_system.sh` can also be sourced, to easily delete the running docker-compose setup
**Congratulation!** You just complete the full installation of your Anomaly Detection System.
......
......@@ -110,6 +110,7 @@ services:
- REDIS_PASSWORD=${REDIS_PASSWORD}
privileged: true
volumes:
- /tmp:/tmp
- /eos:/eos
- /opt/:/opt/
- ${DAG_PATH}:/usr/local/airflow/dags
......
#!/bin/bash
# This script drives the start of docker-compose services
# This script drives the start of docker-compose services
# for the Anomaly Detection System based on Airflow
SOURCE_SCRIPT=$(readlink -f $0)
export SOURCE_DIR=$(readlink -f $(dirname $0))
echo "SOURCE_DIR $SOURCE_DIR"
export AD_SOURCE_DIR="$(cd -P "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
echo "AD_SOURCE_DIR $AD_SOURCE_DIR"
# Example to start docker compose
function get_secrets() {
. ${AD_SOURCE_DIR}/secret.sh
# LOCAL FILE WITH KRB PASSWORD
LOCAL_FILE_KRB_AUTH=/opt/ad_system/etc/local_service_user_password.sh
[ -e ${LOCAL_FILE_KRB_AUTH} ] && . ${LOCAL_FILE_KRB_AUTH}
}
# this should be done from an instance having docker-compose
# for instance from a container
#
function stop_compose_AD() {
echo -e "\nStop previous docker-compose...\n"
docker-compose -f ${AD_SOURCE_DIR}/docker-compose.yml down --remove-orphans # --volumes
docker-compose -f ${AD_SOURCE_DIR}/docker-compose.yml rm
# get superuser priviledges to run iptables and setup eos
echo -e "\nKill processes still using cvmfs\n"
[ `pgrep -f cvmfs -c` -gt 0 ] && kill -9 $(pgrep -f cvmfs)
}
. ${SOURCE_DIR}/secret.sh
function start_compose_AD(){
# LOCAL FILE WITH KRB PASSWORD
LOCAL_FILE_KRB_AUTH=/opt/ad_system/etc/local_service_user_password.sh
[ -e ${LOCAL_FILE_KRB_AUTH} ] && . ${LOCAL_FILE_KRB_AUTH}
stop_compose_AD
get_secrets
echo -e "\nStop previous docker-compose...\n"
docker-compose -f ${SOURCE_DIR}/docker-compose.yml down --remove-orphans # --volumes
docker-compose -f ${SOURCE_DIR}/docker-compose.yml rm
echo -e "\nRunning kinit...\n"
(echo $KRB_USER_PASSW | kinit $KRB_USER@CERN.CH) || fail 'kinit'
echo -e "\nKill processes still using cvmfs\n"
[ `pgrep -f cvmfs -c` -gt 0 ] && kill -9 $(pgrep -f cvmfs)
echo -e "\nTest EOS access...\n"
ls /eos/project/i/it-cloud-data-analytics/
echo -e "\nStart new docker-compose...\n"
docker-compose -f ${AD_SOURCE_DIR}/docker-compose.yml up -d --remove-orphans --renew-anon-volumes # --abort-on-container-exit # --force-recreate
}
echo -e "\nRunning kinit...\n"
(echo $KRB_USER_PASSW | kinit $KRB_USER@CERN.CH) || fail 'kinit'
if [[ $0 != $BASH_SOURCE ]];
then
echo "Script is being sourced"
else
echo "Script is being run"
start_compose_AD
fi
echo -e "\nTest EOS access...\n"
ls /eos/project/i/it-cloud-data-analytics/
echo -e "\nStart new docker-compose...\n"
docker-compose -f ${SOURCE_DIR}/docker-compose.yml up -d --remove-orphans --renew-anon-volumes # --abort-on-container-exit # --force-recreate
......@@ -197,4 +197,4 @@ selected_plugins:
# type: ps_state
# type_instance: blocked
# plugin_name: processes
...
\ No newline at end of file
...
......@@ -11,22 +11,23 @@ function uninstall(){
function install_centos7() {
# INSTALL WGET
yum -y install wget
yum -y install wget \
kstart screen emacs
# INSTALL DOCKER
yum-config-manager \
--add-repo \
https://download.docker.com/linux/centos/docker-ce.repo
yum -y install docker-ce docker-ce-cli containerd.io
install_eos
}
function install_eos(){
# INSTALL EOS
# INSTALL EOS
# https://cern.service-now.com/service-portal?id=kb_article&n=KB0003846
# For CERN CentOS 7 and CentOS8 desktops, please be sure that the EPEL repository is enabled and then use (as root):
echo -e "install EOS: this can require few minutes"
locmap --enable eosclient
locmap --configure eosclient
......@@ -39,24 +40,83 @@ function install_common(){
chmod +x /usr/local/bin/docker-compose
[ ! -e /usr/bin/docker-compose ] && ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose
docker-compose --version
}
function install_ad(){
# STOP DOCKER
systemctl stop docker
# Add user airflow
useradd airflow
usermod -aG docker airflow
# CHANGE SYSTEM GID / UID of DOCKER and AIRFLOW to match the
# internal docker airflow configuration
groupmod -g 1001 airflow
groupmod -g 1000 docker
# update the permission on the main docker file launcher
#chmod 660 /var/run/docker.sock
#chgrp docker /var/run/docker.sock
# add user airflow in the group docker
usermod -g docker airflow
# START DOCKER
systemctl start docker
BASE_AD_DIR=/opt/ad_system
# PREPARE MAIN FOLDER
# main folder with files of the system
[ ! -e /opt/ad_system/ ] && mkdir -p /opt/ad_system/
[ ! -e ${BASE_AD_DIR} ] && mkdir -p ${BASE_AD_DIR}
# go to the main folder
# download the repository file FROM QA
# and create a folder
cd /opt/ad_system/
cd ${BASE_AD_DIR}
wget https://gitlab.cern.ch/cloud-infrastructure/data-analytics/-/archive/$branch/data-analytics-$branch.tar.gz -O - | tar -xz
chown -R airflow ${BASE_AD_DIR}
# create the symlink to give a standard directory for the dags
ln -s /opt/ad_system/data-analytics-$branch/control_room/ /opt/control_ad_system
CONTROL_AD_DIR=${BASE_AD_DIR}/control_ad_system
sudo -u airflow ln -s data-analytics-$branch/control_room/airflow-compose ${CONTROL_AD_DIR}
# PREPARE FOLDER STRUCTURE
# folders to store data locally
mkdir -p /tmp/vm-datalake/project_cache/
# to store temporary configurations
mkdir /tmp/tmp-configurations/
# make airflow owner
chown -R airflow /tmp/tmp-configurations/ /tmp/vm-datalake
set +x
# Now install credentials
KRB_CREDENTIAL_FILE=${BASE_AD_DIR}/etc/local_service_user_password.sh
mkdir -p $(readlink -f $(dirname ${KRB_CREDENTIAL_FILE}))
touch $KRB_CREDENTIAL_FILE
chown airflow $KRB_CREDENTIAL_FILE
echo -e "\nKerberos credentials need to be installed in $KRB_CREDENTIAL_FILE\n to be used by the Spark containers.\n Do you want to do it now?"
read -p "[y]/n " -s answer < /dev/tty #needed to work with "| sh"
if [ "$answer" == "y" ]; then
echo -e "\n"
read -p "Kerberos user: " -s KRB_USER < /dev/tty #needed to work with "| sh"
echo -e "\n"
read -p "Kerberos password: " -s KRB_USER_PASSW < /dev/tty
# create a file in the root with configuration info
echo -e "export KRB_USER='$KRB_USER'\nexport KRB_USER_PASSW='$KRB_USER_PASSW'\n" > $KRB_CREDENTIAL_FILE
chmod 500 $KRB_CREDENTIAL_FILE
else
echo -e "\nPlease remember to include in $KRB_CREDENTIAL_FILE the following lines
export KRB_USER='some_user'
export KRB_USER_PASSW='some_password'\n"
fi
echo " "
}
function set_firewall(){
# Set firewall rules to close some ports
# Set firewall rules to close some ports
# and open the port to communicate with Spark Cluster
if [ `iptables-save | grep DOCKER-USER | grep -c '\-\-dport 5003 -j DROP'` -eq 0 ];
......@@ -112,60 +172,6 @@ function set_firewall(){
}
function configure(){
# STOP DOCKER
systemctl stop docker
# Add user airflow
useradd airflow
usermod -aG docker airflow
# CHANGE SYSTEM GID / UID of DOCKER and AIRFLOW to match the
# internal docker airflow configuration
groupmod -g 1001 airflow
groupmod -g 1000 docker
# update the permission on the main docker file launcher
chmod 660 /var/run/docker.sock
chgrp docker /var/run/docker.sock
# add user airflow in the group docker
usermod -g docker airflow
# START DOCKER
systemctl start docker
# PREPARE FOLDER STRUCTURE
# folders to store data locally
mkdir -p /tmp/vm-datalake/project_cache/
# to store temporary configurations
mkdir /tmp/tmp-configurations/
# make airflow owner
chown airflow /tmp/tmp-configurations/
set +x
# Now install credentials
KRB_CREDENTIAL_FILE=/opt/ad_system/etc/local_service_user_password.sh
mkdir -p $(readlink -f $(dirname ${KRB_CREDENTIAL_FILE}))
echo -e "\nKerberos credentials need to be installed in $KRB_CREDENTIAL_FILE\n to be used by the Spark containers.\n Do you want to do it now?"
read -p "[y]/n " -s answer < /dev/tty #needed to work with "| sh"
if [ "$answer" == "y" ]; then
echo -e "\n"
read -p "Kerberos user: " -s KRB_USER < /dev/tty #needed to work with "| sh"
echo -e "\n"
read -p "Kerberos password: " -s KRB_USER_PASSW < /dev/tty
# create a file in the root with configuration info
echo -e "export KRB_USER='$KRB_USER'\nexport KRB_USER_PASSW='$KRB_USER_PASSW'\n" > $KRB_CREDENTIAL_FILE
chmod 777 $KRB_CREDENTIAL_FILE
else
echo -e "\nPlease remember to include in $KRB_CREDENTIAL_FILE the following lines
export KRB_USER='some_user'
export KRB_USER_PASSW='some_password'\n"
fi
}
function install_all(){
set -x #to display commands to be executed
......@@ -178,13 +184,14 @@ function install_all(){
fi
install_common
configure
install_ad
set_firewall
echo -e "\nInstallation finished.\n"
echo -e "To start the docker-compose of Anomaly Detection System run"
echo -e "/opt/control_ad_system/airflow-compose/start_ad_system.sh"
echo -e "\n you may want to change dummy passwords in /opt/control_ad_system/airflow-compose/secret.sh"
echo -e "${CONTROL_AD_DIR}/start_ad_system.sh"
echo -e "\n you may want to change dummy passwords in ${CONTROL_AD_DIR}/secret.sh"
cd $HOME
}
#FIXME
is this documentation still valid? in that case need to use the correct paths
# Anomaly Detection Pipeline Airflow
The data-analytics model can also be run in an automatic way. For doing that we rely on [Apache Airflow](https://airflow.apache.org/).
To have an easy to use environment we encapsulated all the required blocks (Airflow included) in Docker Containers that can be run thanks to Docker compose.
## Getting started
1. Create a virtual machine on the [OpenStack CERN Platform](https://openstack.cern.ch/) (e.g. named my-ad-machine), we suggest at least the 7 GB of RAM)
1. Access the VM via the following command
(be sure to be in the internal CERN network or having used lxplus).
```shell
matteo@MacBook ssh -L 8080:localhost:8080 root@my-ad-machine
```
In this way you enter the my-anomaly-detection-machine virtual machine with the root user and you are also doing a port forwarding to view the Airflow web interface later.
Note that if you use lxplus as irst step you need to add the "-L 8080:localhost:8080" also to that connection to be able to reach the Aiflow web interface.
1. Create this folder and download your data there:
```shell
root@my-ad-machine mkdir /opt/repositories/
root@my-ad-machine cd /opt/repositories/
root@my-ad-machine wget https://gitlab.cern.ch/cloud-infrastructure/data-analytics/-/archive/master/data-analytics-master.tar
root@my-ad-machine tar -xvf data-analytics-master.tar
root@my-ad-machine mv data-analytics-master data-analytics
```
1. run the installation script. It installs the requirements, namely Docker
and Docker compose, plus creates the folders needed to store data locally during analysis and results of analysis:
```shell
root@my-ad-machine ./install_for_centos.sh
```
1. Set your credential (kerberos/kinit) in the file secret.sh file. This is required to use Spark for the data preparation.
```shell
root@my-ad-machine cd /opt/repositories/data-analytics/pipelines/airflow-compose
root@my-ad-machine vi secret.sh
```
Substitute this_is_not_the_user and this_is_not_the_password with
you real username and password respectively:
```bash
export KRB_USER=this_is_not_the_user
export KRB_USER_PASSW=this_is_not_the_password
```
1. Login in the CERN registry to get the pre-built docker images (insert your kerberos/kinit pasword once again):
```shell
root@my-ad-machine docker login gitlab-registry.cern.ch
```
1. Start docker compose of airflow:
```shell
root@my-ad-machine cd /opt/repositories/data-analytics/pipelines/airflow-compose
root@my-ad-machine ./example_start_docker_compose.sh
```
1. to publish anomalies to the monit infrastructure (ElasticSearch) you have to launch also the Fluentd container
```shell
root@my-ad-machine cd /opt/repositories/data-analytics/pipelines/tasks
root@my-ad-machine ./OPEN_launch_fluentd.sh
```
1. Access the Airflow web interface via the address:
[http://localhost:8080/admin/](http://localhost:8080/admin/)
1. You can fin you DAG Direct Acyclic Graphs of tasks in the following folder:
```shell
root@my-ad-machine cd /opt/repositories/data-analytics/pipelines/airflow-compose/dags
```
1. You can know switch on one of the example dags and the anomaly detection should start.
# Throubleshooting
**PROBLEM: Some DAGS might crash ecause they are using the localsparknotebook docker image that is not present**
SOLUTION: You can build the localsparknotebook image with the following script:
```shell
root@my-ad-machine cd /opt/repositories/data-analytics/pipelines/tasks
root@my-ad-machine ./build_qa_locally.sh
```
**PROBLEM: Config data are not accessible to airflow because of ownership problems**
SOLUTION: change ownership to user 1000 of the configuration folder
```shell
root@my-ad-machine
```
## Old procedure
12. Go to pipelines/airflow-compose
13. Create (in pipelines/airflow-compose) never_commit.sh
14. Go to pipelines/tasks
15. Run the to open port for spark:
./prepare_VM.sh
16. Login in the registry cern (your name and pass)
docker login gitlab-registry.cern.ch
17. Go to pipelines/tasks
18. Create the notebook image:
./build_qa_locally.sh
19. Change the configuration file ownership (in folder pipelines/tasks):
20. chown 1000 auto_airflow_test.json
21. chown 1000 auto_airflow_train.json
22. chmod 666 auto_airflow_test.json
23. chmod 666 auto_airflow_train.json
24. Give high perimission to docker to handle the sparknotbeook container:
25. chmod 777 /var/run/docker.sock
26. Install cvmfs: (check below)
27. Change the version of fluentd in docker-compose-manual.yml (folder pipelines/manual_injection_MONIT)
Change fluentd:leatest -> fluentd:v0.1
28. Launch fluend:
./1_launch_fluentd.sh
29. Start the airflow docker-compose (in folder pipelines/airflow-compose):
./example_start_docker_compose.sh
\ No newline at end of file
python3 -m tox -ecover
\ No newline at end of file
echo "Move them"
cp notebooks_private/*.ipynb notebooks
echo "Standardize names"
for f in notebooks/*.ipynb
do
mv "$f" "${f// /_}"
done
echo "Reset the new ones"
jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace notebooks/*.ipynb
\ No newline at end of file
# Tests
- [anomaly_to_ES](#anomaly_to_ES)
- [grafana_etl](#grafana_etl)
- [spark_etl](#park_etl)
Describe the test purpose, content and method to reproduce outside CI
These tests run in CI via the .gitlab-ci.yml.
Therefore most of the indications about how to run can be found in the dedicated jobs.
The tests are implemented in a way easy to reproduce outside the CI, interactively,
in the local development area.
Unit tests run with `tox`, already installed in the image $CI_REGISTRY_IMAGE/tox:latest
It's possible to run interactively the same tests using the tox image and bind mounting the repository
```
docker run -v `pwd`:`pwd` -w `pwd` --rm -it $CI_REGISTRY_IMAGE/tox:latest
scl enable rh-python36 'tox -ecover'
```
## anomaly_to_ES
Docker compose configuration to start Elasticsearch, fluentd, and test the insertion of JSON anomaly reports into ES
More details in [anomaly_to_ES/README.md](anomaly_to_ES/README.md)
## grafana_etl
In addition to the unit tests (to be completed) running via tox, a demonstrator of how to run the grafana_etl library via
jupyter notebook is added ( [test_ETL.ipynb](grafana_etl/test_ETL.ipynb)). This notebook shows how to use the grafana_etl
and a simple declarative configuration (yml file) to extract data from influxDB and display it using pandas dataframes.
## spark_etl
In addition to the unit tests (to be completed) running via tox, the access to HDFS and Spark data using the spark_etl python module is
proved here. Details are provided in the [spark_etl/README.md](spark_etl/README.md)
......@@ -4,4 +4,4 @@ Docker compose configuration to start Elasticsearch, fluentd, and test the inser
How to run interactively: read the header of the script `ci_run_script.sh`
The same script is used in the `CI`
\ No newline at end of file
The same script is used in the `CI`. Refer to the job `pipeline_anomaly_to_ES`
......@@ -37,15 +37,3 @@ echo LD_LIBRARY_PATH $LD_LIBRARY_PATH
echo JAVA_HOME $JAVA_HOME
echo SPARK_HOME $SPARK_HOME