Commit 8b65bbf5 authored by Agata Malgorzata Chadaj's avatar Agata Malgorzata Chadaj
Browse files

[SIGMON-150]

parent c9afbf16
FROM gitlab-registry.cern.ch/db/swan-spark-notebooks:latest
RUN yum install fuse-sshfs -y
RUN yum install krb5-workstation -y
RUN yum -y install libjpeg-turbo
CMD echo 'Container started!'
\ No newline at end of file
This diff is collapsed.
@Library('mpe-pipeline-utils') _
pipeline {
agent {
dockerfile {
dir 'CI'
args '--network host -v /cvmfs:/cvmfs:shared -v /root/.ssh/mpesoft_key:/root/.ssh/mpesoft_key:ro --privileged'
}
}
options {
buildDiscarder(logRotator(numToKeepStr: "32"))
disableConcurrentBuilds()
}
stages {
stage('Notebook run') {
steps {
sshMount('/user/mpesoft/', '/user/mpesoft/')
kerberosAuth('mpesoft', '/user/mpesoft/mpesoft.keytab')
sh 'chmod +x ./CI/env_setup.sh'
sh 'chmod +x ./CI/setup.sh'
sh "./CI/setup.sh ${params.notebook} ${params.hwc_test} ${params.circuit_name} ${params.campaign} \"${params.t_start}\" \"${params.t_end}\" ${params.API_BRANCH} ${params.NB_BRANCH}"
}
}
}
post {
always {
archiveArtifacts artifacts: 'output/*'
}
cleanup {
cleanWs()
}
}
}
#!/bin/bash
### NXCALS configuration
export LCG_VIEW=/cvmfs/sft.cern.ch/lcg/views/LCG_100_nxcals/x86_64-centos7-gcc9-opt
source $LCG_VIEW/setup.sh
alias python="$LCG_VIEW/bin/python"
alias python3="$LCG_VIEW/bin/python3"
alias pip="$LCG_VIEW/bin/pip3.8"
alias pytest="$LCG_VIEW/bin/pytest"
source /cvmfs/sft.cern.ch/lcg/etc/hadoop-confext/hadoop-swan-setconf.sh hadoop-nxcals
export PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip:$PYTHONPATH
export NXCALS_JARS=$(ls $LCG_VIEW/nxcals/nxcals_java/* | xargs | sed -e 's/ /:/g')
### install dependencies
export PYTHONPATH=/tmp/:$PYTHONPATH
pip install -t /tmp/ papermill
pip install -t /tmp/ ./papermill_nxcals/
pip install -t /tmp/ -r ./papermill_nxcals/requirements.txt
git clone --single-branch --branch "${API_BRANCH:-dev}" https://:@gitlab.cern.ch:8443/LHCData/lhc-sm-api.git
git clone --single-branch --branch "${NB_BRANCH:-dev}" https://:@gitlab.cern.ch:8443/LHCData/lhc-sm-notebook.git
pip install -t /tmp/ -r ./lhc-sm-api/swan-nxcals-spark3-requirements.txt
rsync -av ./lhc-sm-api/lhcsmapi/ /tmp/lhcsmapi
rsync -av ./lhc-sm-notebook/lhcsmnb/ /tmp/lhcsmnb
\ No newline at end of file
"""This script is a workaround as we didn't manage to install papermill as script in the Jenkins pipeline"""
import papermill as pm
import argparse
def _get_arg_parser():
"""Specifies script arguments. Returns parser.
"""
parser = argparse.ArgumentParser(description='Runs notebook with papermill.')
parser.add_argument('notebook')
parser.add_argument('hwc_test')
parser.add_argument('circuit_name')
parser.add_argument('campaign')
parser.add_argument('t_start')
parser.add_argument('t_end')
return parser
if __name__ == '__main__':
args = _get_arg_parser().parse_args()
pm.execute_notebook(
args.notebook,
'./output/result.ipynb',
parameters={'hwc_test': args.hwc_test,
'circuit_name': args.circuit_name,
'campaign': args.campaign,
't_start': args.t_start,
't_end': args.t_end
},
engine_name='nxcals_engine'
)
......@@ -4,29 +4,10 @@
echo "$EOS_ACCOUNT_PASSWORD" | kinit -c $KRB5CCNAME "$EOS_ACCOUNT_USERNAME@CERN.CH"
yum -y install libjpeg-turbo
### NXCALS configuration
export LCG_VIEW=/cvmfs/sft.cern.ch/lcg/views/LCG_100_nxcals/x86_64-centos7-gcc9-opt
source $LCG_VIEW/setup.sh
alias python="$LCG_VIEW/bin/python"
alias python3="$LCG_VIEW/bin/python3"
alias pip="$LCG_VIEW/bin/pip3.8"
alias pytest="$LCG_VIEW/bin/pytest"
source /cvmfs/sft.cern.ch/lcg/etc/hadoop-confext/hadoop-swan-setconf.sh hadoop-nxcals
export PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip:$PYTHONPATH
export NXCALS_JARS=$(ls $LCG_VIEW/nxcals/nxcals_java/* | xargs | sed -e 's/ /:/g')
source ./CI/env_setup.sh
### install dependencies
pip list
export PYTHONPATH=/tmp/:$PYTHONPATH
pip install -t /tmp/ papermill
pip install -t /tmp/ -r ./papermill_nxcals/requirements.txt
pip install -t /tmp/ ./papermill_nxcals/
git clone --single-branch --branch "${API_BRANCH:-dev}" https://:@gitlab.cern.ch:8443/LHCData/lhc-sm-api.git
git clone --single-branch --branch "${NB_BRANCH:-dev}" https://:@gitlab.cern.ch:8443/LHCData/lhc-sm-notebook.git
pip install -t /tmp/ -r ./lhc-sm-api/swan-nxcals-spark3-requirements.txt
rsync -av ./lhc-sm-api/lhcsmapi/ /tmp/lhcsmapi
rsync -av ./lhc-sm-notebook/lhcsmnb/ /tmp/lhcsmnb
### copy HWC_Summary.csv
mkdir -p /eos/project/l/lhcsm/hwc/
......@@ -36,4 +17,4 @@ xrdcp root://eosuser.cern.ch//eos/project/l/lhcsm/hwc/HWC_Summary.csv /eos/proje
mkdir ./results
### run notebooks
python -m pytest test -n=4 --junitxml=report.xml
\ No newline at end of file
python3 -m pytest test -n=4 --junitxml=report.xml
\ No newline at end of file
#!/bin/bash
export API_BRANCH="$7"
export NB_BRANCH="$8"
yum -y install libjpeg-turbo
source ./CI/env_setup.sh
### copy HWC_Summary.csv
mkdir -p /eos/project/l/lhcsm/hwc/
rsync -aR ./CI/HWC_Summary.csv /eos/project/l/lhcsm/hwc/HWC_Summary.csv
### run the notebook
mkdir -p output
python3 ./CI/jenkins.py "$1" "$2" "$3" "$4" "$5" "$6"
## Running notebooks on Jenkins
There is a pipeline created which allows us to run a single notebook.
http://mpe-jenkins.cern.ch/view/Playground/job/SIGMON/
The pipeline accepts the following parameters:
* notebook's path (i.e. `ipq/AN_IPQ_PIC2.ipynb`)
* HWC test (i.e. `PIC2`)
* Circuit name (i.e. `RQ10.R2`)
* Campaign (i.e. `HWC_2018_1`)
* Start and end time (in format `2018-03-15 15:34:40.636`)
* Branch of lhc-sm-api to be used for running the pipeline (default `dev`)
* Branch of lhc-sm-notebook to be used for running the pipeline (default `dev`)
![jenkins.png](jenkins.png)
## Pipeline environment
Currently, the pipeline is running on mpe-jenkins-agent-2.cern.ch.
Spark is set up to run in YARN mode, which requires some additional parameters configured in `papermill_nxcals/src/resources/manual_spark_config.py`:
```
conf.set('spark.master', 'yarn')
conf.set("spark.driver.host", host_name)
conf.set("spark.driver.port", '5001')
conf.set("spark.blockManager.port", '5101')
conf.set("spark.ui.port", '5201')
```
In order for the setup to work, the listed ports must be open on the host machine. It can be done by exectuting the following commands:
```
sudo firewall-cmd --add-port=5001/tcp
sudo firewall-cmd --add-port=5101/tcp
sudo firewall-cmd --add-port=5201/tcp
sudo firewall-cmd --runtime-to-permanent
```
The environment in which the notebook is run is equal to the one used in SWAN.
It's ensured by sourcing NXCALS configuration from CVMFS
(`source /cvmfs/sft.cern.ch/lcg/etc/hadoop-confext/hadoop-swan-setconf.sh hadoop-nxcals`) and installing all the recent dependencies listed in `CI/setup.sh`.
The pipeline starts a docker container:
```
agent {
dockerfile {
dir 'CI'
args '--network host -v /cvmfs:/cvmfs:shared -v /root/.ssh/mpesoft_key:/root/.ssh/mpesoft_key:ro --privileged'
}
}
```
Run arguments are very important in this case, to use the host's network stack inside the container and mount all the required directories for CVMFS in shared mode and for mpesoft key.
CVMFS needs to be enabled with locmap on the machine that executes the pipeline with the command `locmap --enable cvmfs`
......@@ -4,29 +4,43 @@ Used unless the spark context is already created. (outside of SWAN service or py
"""
if 'spark' not in locals() and 'spark' not in globals():
import os
import socket
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
nxcals_jars = os.getenv('NXCALS_JARS')
host_name = 'spark-runner.cern.ch' if os.environ.get('CI', 'false') == 'true' else socket.gethostname()
conf = SparkConf()
conf.set('spark.master', 'yarn')
conf.set("spark.driver.host", "spark-runner.cern.ch")
conf.set("spark.driver.host", host_name)
conf.set("spark.driver.port", '5001')
conf.set("spark.blockManager.port", '5101')
conf.set("spark.ui.port", '5201')
# adjust PYTHONPATH and LD_LIBRARY_PATH on driver
conf.set('spark.yarn.appMasterEnv.PYTHONPATH', os.getenv('PYTHONPATH'))
conf.set('spark.yarn.appMasterEnv.LD_LIBRARY_PATH', os.getenv('LD_LIBRARY_PATH'))
# adjust PYTHONPATH and LD_LIBRARY_PATH on executor
conf.set('spark.executorEnv.PYTHONPATH', os.getenv('PYTHONPATH'))
conf.set('spark.executorEnv.LD_LIBRARY_PATH', os.getenv('LD_LIBRARY_PATH'))
conf.set('spark.executorEnv.JAVA_HOME', os.getenv('JAVA_HOME'))
conf.set('spark.executorEnv.SPARK_HOME', os.getenv('SPARK_HOME'))
conf.set('spark.executorEnv.SPARK_EXTRA_CLASSPATH', os.getenv('SPARK_DIST_CLASSPATH'))
conf.set('spark.driver.extraClassPath', nxcals_jars)
conf.set('spark.executor.extraClassPath', nxcals_jars)
conf.set('spark.driver.extraJavaOptions',
"-Dservice.url=https://cs-ccr-nxcals5.cern.ch:19093,https://cs-ccr-nxcals5.cern.ch:19094,"
"-Dservice.url="
"https://cs-ccr-nxcals5.cern.ch:19093,https://cs-ccr-nxcals5.cern.ch:19094,"
"https://cs-ccr-nxcals6.cern.ch:19093,https://cs-ccr-nxcals6.cern.ch:19094,"
"https://cs-ccr-nxcals7.cern.ch:19093,https://cs-ccr-nxcals7.cern.ch:19094,"
"https://cs-ccr-nxcals8.cern.ch:19093,https://cs-ccr-nxcals8.cern.ch:19094,"
"https://cs-ccr-nxcalsstr4.cern.ch:19093,https://cs-ccr-nxcalsstr5.cern.ch:19093")
"https://cs-ccr-nxcals8.cern.ch:19093,https://cs-ccr-nxcals8.cern.ch:19094")
sc = SparkContext(conf=conf)
spark = SparkSession(sc)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment