Commit 15f77398 authored by Domenico Giordano's avatar Domenico Giordano
Browse files

first integration test of adcern

parent d1dfa189
{
"hostgroups": [
"cloud_compute/level2/batch/gva_project_013"
],
"code_project_name": "CI_test",
"local_cache_folder": "/eos/project/i/it-cloud-data-analytics/CI/pre_alwyas_on/",
"hdfs_out_folder": "/project/it_cloud_data_analytics/CI/raw_parquet_train/",
"hdfs_cache_folder": "/project/it_cloud_data_analytics/CI/compressed_train/",
"normalization_out_folder": "/project/it_cloud_data_analytics/CI/normalization",
"overwrite_on_hdfs": true,
"overwrite_normalization": true,
"aggregate_every_n_minutes": 10,
"history_steps": 48,
"slide_steps": 1,
"future_steps": 0,
"date_start": "2021-01-03",
"date_end_excluded": "2021-01-10",
"date_start_normalization": "2021-01-03",
"date_end_normalization_excluded": "2021-01-10",
"selected_plugins": {
"load_longterm": {
"value_instance": "longterm",
"plugin_name": "load"
},
"cpu__percent_idle": {
"plugin_instance": "",
"type": "percent",
"type_instance": "idle",
"plugin_name": "cpu"
},
"memory__memory_free": {
"plugin_instance": "",
"type": "memory",
"type_instance": "free",
"plugin_name": "memory"
},
"vmem__vmpage_io_memory_in": {
"plugin_instance": "",
"type": "vmpage_io",
"type_instance": "memory",
"value_instance": "in",
"plugin_name": "vmem"
},
"swap_swapfile_swap_free": {
"type": "swap",
"type_instance": "free",
"plugin_name": "swap"
}
}
}
\ No newline at end of file
#!/bin/bash -e
# This script is used by the gitlab CI of this repository to run a
# test of adcern lib functionalities
# Requirements:
# - spark_etl libs (distributed in docker image)
# - Kerberos authentication (passed via CI)
# - cvmfs spark libs (exposed via cvmfs)
# In order to access the needed libraries from cvmfs,
# a cvmfs service is started with docker-compose
#
# In order to run the same script manually, assuming only docker available, run
#
# CI_PROJECT_DIR=`pwd | sed -e 's@/tests/.*@@'`
# docker run --rm -e CI_USER=$CI_USER -e CI_USER_PASSWD=$CI_USER_PASSWD -e CI_PROJECT_DIR=${CI_PROJECT_DIR}
# -v /tmp:/tmp -v /builds:/builds -v `pwd`:/work -v /var/run/docker.sock:/var/run/docker.sock
# gitlab-registry.cern.ch/cloud-infrastructure/data-analytics/compose:v1.0 /work/tests/adcern/integration/ci_test_script.sh
#
# Consider to open the Spark connection ports in iptables
#
# sudo iptables -I INPUT -p tcp -m multiport --dports 5001:6000 -m comment --comment "00200 firewall for hadoop jobs" -j ACCEPT
# sudo iptables -I DOCKER-USER -p tcp -m multiport --dports 5001:6000 -m comment --comment "00200 firewall for hadoop jobs" -j ACCEPT
#
WORK_DIR=$(readlink -f $(dirname $0))
echo WORK_DIR $WORK_DIR
cd $WORK_DIR
export CI_COMMIT_SHORT_SHA=${CI_COMMIT_SHORT_SHA:-noCI}
export CI_COMMIT_BRANCH=${CI_COMMIT_BRANCH:-noCI}
export COMMIT_TAG=${CI_COMMIT_TAG:-$CI_COMMIT_BRANCH}
export CVMFSDIR=/builds/cvmfs-${COMMIT_TAG}
export KRB5DIR=/builds/krb5-${COMMIT_TAG}
export IMAGE_TAG=${COMMIT_TAG}
mkdir -p $KRB5DIR
export KRB5CCNAME=$KRB5DIR/krb5cc_docker; kdestroy -c $KRB5CCNAME ; echo $CI_USER_PASSWD | kinit -c $KRB5CCNAME $CI_USER@CERN.CH; klist -c $KRB5CCNAME
ls -l $KRB5DIR
docker-compose -f docker-compose.yml down --remove-orphans --volumes
docker-compose pull
docker-compose -f docker-compose.yml -p adcern up --remove-orphans --renew-anon-volumes --abort-on-container-exit --exit-code-from srv_test_adcern
#docker-compose logs -f 2>&1 >> compose.log &
{
"hostgroups": [
"cloud_compute/level2/batch/gva_project_013"
],
"code_project_name": "CI_test",
"local_cache_folder": "/eos/project/i/it-cloud-data-analytics/CI/pre_alwyas_on/",
"hdfs_out_folder": "/project/it_cloud_data_analytics/CI/essential_raw_parquet_train/",
"hdfs_cache_folder": "/project/it_cloud_data_analytics/CI/essential_compressed_train/",
"normalization_out_folder": "/project/it_cloud_data_analytics/CI/normalization",
"overwrite_on_hdfs": true,
"overwrite_normalization": true,
"aggregate_every_n_minutes": 10,
"history_steps": 48,
"slide_steps": 1,
"future_steps": 0,
"date_start": "2021-01-03",
"date_end_excluded": "2021-01-10",
"date_start_normalization": "2021-01-03",
"date_end_normalization_excluded": "2021-01-10",
"selected_plugins": {
"load_longterm": {
"value_instance": "longterm",
"plugin_name": "load"
},
"cpu__percent_idle": {
"plugin_instance": "",
"type": "percent",
"type_instance": "idle",
"plugin_name": "cpu"
},
"memory__memory_free": {
"plugin_instance": "",
"type": "memory",
"type_instance": "free",
"plugin_name": "memory"
},
"vmem__vmpage_io_memory_in": {
"plugin_instance": "",
"type": "vmpage_io",
"type_instance": "memory",
"value_instance": "in",
"plugin_name": "vmem"
},
"swap_swapfile_swap_free": {
"type": "swap",
"type_instance": "free",
"plugin_name": "swap"
}
}
}
\ No newline at end of file
# docker-compose pipeline to run pyspark test
# providing cvmfs via container service
# Use docker-compose variable substitution as from
# https://docs.docker.com/compose/compose-file/#variable-substitution
version: '3.2'
# using this version to get cvmfs
#https://github.com/moby/moby/issues/34936
services:
srv_cvmfs:
image: gitlab-registry.cern.ch/hep-benchmarks/hep-workloads-builder/cvmfs-image:qa
command: -r sft.cern.ch -t /tmp/traces
privileged: true
#container_name: cnt_cvmfs_sft
volumes:
- $CVMFSDIR:/cvmfs:shared
network_mode: host
# This does not work
# https://docs.docker.com/compose/compose-file/
# https://docs.docker.com/storage/bind-mounts/#configure-bind-propagation
# - type: bind
# source: /tmp/cvmfs-stf
# target: /cvmfs
# propagation: shared
#ERROR: The Compose file './docker-compose.yml' is invalid because:
#services.srv_cvmfs.volumes contains unsupported option: 'propagation'
srv_spark_etl:
image: gitlab-registry.cern.ch/cloud-infrastructure/data-analytics/sparknotebook:${IMAGE_TAG}
command: sh -c "echo 'sleep to give time to /cvmfs to get alive' && /usr/bin/sleep 30 && /work/tests/adcern/integration/test_adcern_cmd.sh"
depends_on:
- srv_cvmfs
volumes:
- $CVMFSDIR:/cvmfs:shared
- $CI_PROJECT_DIR:/work
- $KRB5DIR:$KRB5DIR
environment:
- KRB5CCNAME=${KRB5CCNAME}
- CI_COMMIT_SHORT_SHA=${CI_COMMIT_SHORT_SHA}
network_mode: host
\ No newline at end of file
#!/bin/bash
# This script should run in an environment (eg: docker container) configured
# to run jupyter notebooks and pyspark and connect to Spark
# E.g.
# export CVMFSDIR=/cvmfs
# export KRB5CCNAME=<kerberos active ticket cache>
# docker run -it --rm -e KRB5CCNAME=$KRB5CCNAME -v $CVMFSDIR:/cvmfs:shared -v /tmp:/tmp -v `pwd`:/work --net host gitlab-registry.cern.ch/cloud-infrastructure/data-analytics/sparknotebook:latest
function fail(){
echo -e "\n------------------------\nFailing '$@'\n------------------------\n" >&2
echo -e "\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
echo -e "\n$0 finished (NOT OK) at $(date)\n"
echo -e "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"
exit 1
}
WORK_DIR=$(readlink -f $(dirname $0))
echo WORK_DIR $WORK_DIR
[[ (-z "$KRB5CCNAME") || ( ! -e "$KRB5CCNAME") ]] && fail "Please export KRB5CCNAME pointing to a valid Kerberos ticket cache file. EXIT"
klist -c $KRB5CCNAME
echo -e "\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
echo -e "\nSetting hadoop libs\n"
echo -e "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"
source set_spark_analytix.sh
echo -e "\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
echo -e "Verify Env\n"
echo -e "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"
echo PYTHONPATH $PYTHONPATH
echo LD_LIBRARY_PATH $LD_LIBRARY_PATH
echo JAVA_HOME $JAVA_HOME
echo SPARK_HOME $SPARK_HOME
echo SPARK_DIST_CLASSPATH $SPARK_DIST_CLASSPATH
HDFS_ADCERN_TEST_DIR=/project/it_cloud_data_analytics/CI/
echo -e "\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
echo -e "\ntest access to hdfs\n"
echo -e "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"
hdfs dfs -ls ${HDFS_ADCERN_TEST_DIR}
hdfs dfs -rm -R ${HDFS_ADCERN_TEST_DIR}
echo -e "\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
echo -e "\ntest data_mining downloaddata\n"
echo -e "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"
data_mining downloaddata --resource_file /work/adcern_cfg.json || fail 'test data_mining downloaddata'
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment