Commit d9e022de authored by Aleksandra Mnich's avatar Aleksandra Mnich
Browse files

Merge branch 'dev' of ssh://gitlab.cern.ch:7999/LHCData/lhc-sm-hwc into...

Merge branch 'dev' of ssh://gitlab.cern.ch:7999/LHCData/lhc-sm-hwc into SIGMON-222_support_many_ee_references
parents 1739e29f cc09f06e
Pipeline #3192348 failed with stage
in 88 minutes and 38 seconds
......@@ -100,4 +100,5 @@ notebooks_exec:
- docker
only:
- pipelines
- web
\ No newline at end of file
- web
- schedules
\ No newline at end of file
FROM gitlab-registry.cern.ch/db/swan-spark-notebooks:latest
RUN yum install fuse-sshfs -y
RUN yum install krb5-workstation -y
RUN yum -y install libjpeg-turbo
CMD echo 'Container started!'
\ No newline at end of file
This diff is collapsed.
@Library('mpe-pipeline-utils') _
pipeline {
agent {
dockerfile {
dir 'CI'
args '--network host -v /cvmfs:/cvmfs:shared -v /root/.ssh/mpesoft_key:/root/.ssh/mpesoft_key:ro --privileged'
}
}
options {
buildDiscarder(logRotator(numToKeepStr: "32"))
disableConcurrentBuilds()
}
stages {
stage('Notebook run') {
steps {
sshMount('/user/mpesoft/', '/user/mpesoft/')
kerberosAuth('mpesoft', '/user/mpesoft/mpesoft.keytab')
sh 'chmod +x ./CI/env_setup.sh'
sh 'chmod +x ./CI/setup.sh'
sh "./CI/setup.sh ${params.notebook} ${params.hwc_test} ${params.circuit_name} ${params.campaign} \"${params.t_start}\" \"${params.t_end}\" ${params.API_BRANCH} ${params.NB_BRANCH}"
}
}
}
post {
always {
archiveArtifacts artifacts: 'output/*'
}
cleanup {
cleanWs()
}
}
}
#!/bin/bash
### NXCALS configuration
export LCG_VIEW=/cvmfs/sft.cern.ch/lcg/views/LCG_100_nxcals/x86_64-centos7-gcc9-opt
source $LCG_VIEW/setup.sh
alias python="$LCG_VIEW/bin/python"
alias python3="$LCG_VIEW/bin/python3"
alias pip="$LCG_VIEW/bin/pip3.8"
alias pytest="$LCG_VIEW/bin/pytest"
source /cvmfs/sft.cern.ch/lcg/etc/hadoop-confext/hadoop-swan-setconf.sh hadoop-nxcals
export PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip:$PYTHONPATH
export NXCALS_JARS=$(ls $LCG_VIEW/nxcals/nxcals_java/* | xargs | sed -e 's/ /:/g')
### install dependencies
export PYTHONPATH=/tmp/:$PYTHONPATH
pip install -t /tmp/ papermill
pip install -t /tmp/ ./papermill_nxcals/
pip install -t /tmp/ -r ./papermill_nxcals/requirements.txt
git clone --single-branch --branch "${API_BRANCH:-dev}" https://:@gitlab.cern.ch:8443/LHCData/lhc-sm-api.git
git clone --single-branch --branch "${NB_BRANCH:-dev}" https://:@gitlab.cern.ch:8443/LHCData/lhc-sm-notebook.git
pip install -t /tmp/ -r ./lhc-sm-api/swan-nxcals-spark3-requirements.txt
rsync -av ./lhc-sm-api/lhcsmapi/ /tmp/lhcsmapi
rsync -av ./lhc-sm-notebook/lhcsmnb/ /tmp/lhcsmnb
\ No newline at end of file
"""This script is a workaround as we didn't manage to install papermill as script in the Jenkins pipeline"""
import papermill as pm
import argparse
def _get_arg_parser():
"""Specifies script arguments. Returns parser.
"""
parser = argparse.ArgumentParser(description='Runs notebook with papermill.')
parser.add_argument('notebook')
parser.add_argument('hwc_test')
parser.add_argument('circuit_name')
parser.add_argument('campaign')
parser.add_argument('t_start')
parser.add_argument('t_end')
return parser
if __name__ == '__main__':
args = _get_arg_parser().parse_args()
pm.execute_notebook(
args.notebook,
'./output/result.ipynb',
parameters={'hwc_test': args.hwc_test,
'circuit_name': args.circuit_name,
'campaign': args.campaign,
't_start': args.t_start,
't_end': args.t_end
},
engine_name='nxcals_engine'
)
......@@ -4,29 +4,10 @@
echo "$EOS_ACCOUNT_PASSWORD" | kinit -c $KRB5CCNAME "$EOS_ACCOUNT_USERNAME@CERN.CH"
yum -y install libjpeg-turbo
### NXCALS configuration
export LCG_VIEW=/cvmfs/sft.cern.ch/lcg/views/LCG_100_nxcals/x86_64-centos7-gcc9-opt
source $LCG_VIEW/setup.sh
alias python="$LCG_VIEW/bin/python"
alias python3="$LCG_VIEW/bin/python3"
alias pip="$LCG_VIEW/bin/pip3.8"
alias pytest="$LCG_VIEW/bin/pytest"
source /cvmfs/sft.cern.ch/lcg/etc/hadoop-confext/hadoop-swan-setconf.sh hadoop-nxcals
export PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip:$PYTHONPATH
export NXCALS_JARS=$(ls $LCG_VIEW/nxcals/nxcals_java/* | xargs | sed -e 's/ /:/g')
source ./CI/env_setup.sh
### install dependencies
pip list
export PYTHONPATH=/tmp/:$PYTHONPATH
pip install -t /tmp/ papermill
pip install -t /tmp/ -r ./papermill_nxcals/requirements.txt
pip install -t /tmp/ ./papermill_nxcals/
git clone --single-branch --branch "${API_BRANCH:-dev}" https://:@gitlab.cern.ch:8443/LHCData/lhc-sm-api.git
git clone --single-branch --branch "${NB_BRANCH:-dev}" https://:@gitlab.cern.ch:8443/LHCData/lhc-sm-notebook.git
pip install -t /tmp/ -r ./lhc-sm-api/swan-nxcals-spark3-requirements.txt
rsync -av ./lhc-sm-api/lhcsmapi/ /tmp/lhcsmapi
rsync -av ./lhc-sm-notebook/lhcsmnb/ /tmp/lhcsmnb
### copy HWC_Summary.csv
mkdir -p /eos/project/l/lhcsm/hwc/
......@@ -36,4 +17,4 @@ xrdcp root://eosuser.cern.ch//eos/project/l/lhcsm/hwc/HWC_Summary.csv /eos/proje
mkdir ./results
### run notebooks
python -m pytest test -n=4 --junitxml=report.xml
\ No newline at end of file
python3 -m pytest test -n=4 --junitxml=report.xml
\ No newline at end of file
#!/bin/bash
export API_BRANCH="$7"
export NB_BRANCH="$8"
yum -y install libjpeg-turbo
source ./CI/env_setup.sh
### copy HWC_Summary.csv
mkdir -p /eos/project/l/lhcsm/hwc/
rsync -aR ./CI/HWC_Summary.csv /eos/project/l/lhcsm/hwc/HWC_Summary.csv
### run the notebook
mkdir -p output
python3 ./CI/jenkins.py "$1" "$2" "$3" "$4" "$5" "$6"
## Running notebooks on Jenkins
There is a pipeline created which allows us to run a single notebook.
http://mpe-jenkins.cern.ch/view/Playground/job/SIGMON/
The pipeline accepts the following parameters:
* notebook's path (i.e. `ipq/AN_IPQ_PIC2.ipynb`)
* HWC test (i.e. `PIC2`)
* Circuit name (i.e. `RQ10.R2`)
* Campaign (i.e. `HWC_2018_1`)
* Start and end time (in format `2018-03-15 15:34:40.636`)
* Branch of lhc-sm-api to be used for running the pipeline (default `dev`)
* Branch of lhc-sm-notebook to be used for running the pipeline (default `dev`)
![jenkins.png](jenkins.png)
## Pipeline environment
Currently, the pipeline is running on mpe-jenkins-agent-2.cern.ch.
Spark is set up to run in YARN mode, which requires some additional parameters configured in `papermill_nxcals/src/resources/manual_spark_config.py`:
```
conf.set('spark.master', 'yarn')
conf.set("spark.driver.host", host_name)
conf.set("spark.driver.port", '5001')
conf.set("spark.blockManager.port", '5101')
conf.set("spark.ui.port", '5201')
```
In order for the setup to work, the listed ports must be open on the host machine. It can be done by exectuting the following commands:
```
sudo firewall-cmd --add-port=5001/tcp
sudo firewall-cmd --add-port=5101/tcp
sudo firewall-cmd --add-port=5201/tcp
sudo firewall-cmd --runtime-to-permanent
```
The environment in which the notebook is run is equal to the one used in SWAN.
It's ensured by sourcing NXCALS configuration from CVMFS
(`source /cvmfs/sft.cern.ch/lcg/etc/hadoop-confext/hadoop-swan-setconf.sh hadoop-nxcals`) and installing all the recent dependencies listed in `CI/setup.sh`.
The pipeline starts a docker container:
```
agent {
dockerfile {
dir 'CI'
args '--network host -v /cvmfs:/cvmfs:shared -v /root/.ssh/mpesoft_key:/root/.ssh/mpesoft_key:ro --privileged'
}
}
```
Run arguments are very important in this case, to use the host's network stack inside the container and mount all the required directories for CVMFS in shared mode and for mpesoft key.
CVMFS needs to be enabled with locmap on the machine that executes the pipeline with the command `locmap --enable cvmfs`
......@@ -4,29 +4,43 @@ Used unless the spark context is already created. (outside of SWAN service or py
"""
if 'spark' not in locals() and 'spark' not in globals():
import os
import socket
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
nxcals_jars = os.getenv('NXCALS_JARS')
host_name = 'spark-runner.cern.ch' if os.environ.get('CI', 'false') == 'true' else socket.gethostname()
conf = SparkConf()
conf.set('spark.master', 'yarn')
conf.set("spark.driver.host", "spark-runner.cern.ch")
conf.set("spark.driver.host", host_name)
conf.set("spark.driver.port", '5001')
conf.set("spark.blockManager.port", '5101')
conf.set("spark.ui.port", '5201')
# adjust PYTHONPATH and LD_LIBRARY_PATH on driver
conf.set('spark.yarn.appMasterEnv.PYTHONPATH', os.getenv('PYTHONPATH'))
conf.set('spark.yarn.appMasterEnv.LD_LIBRARY_PATH', os.getenv('LD_LIBRARY_PATH'))
# adjust PYTHONPATH and LD_LIBRARY_PATH on executor
conf.set('spark.executorEnv.PYTHONPATH', os.getenv('PYTHONPATH'))
conf.set('spark.executorEnv.LD_LIBRARY_PATH', os.getenv('LD_LIBRARY_PATH'))
conf.set('spark.executorEnv.JAVA_HOME', os.getenv('JAVA_HOME'))
conf.set('spark.executorEnv.SPARK_HOME', os.getenv('SPARK_HOME'))
conf.set('spark.executorEnv.SPARK_EXTRA_CLASSPATH', os.getenv('SPARK_DIST_CLASSPATH'))
conf.set('spark.driver.extraClassPath', nxcals_jars)
conf.set('spark.executor.extraClassPath', nxcals_jars)
conf.set('spark.driver.extraJavaOptions',
"-Dservice.url=https://cs-ccr-nxcals5.cern.ch:19093,https://cs-ccr-nxcals5.cern.ch:19094,"
"-Dservice.url="
"https://cs-ccr-nxcals5.cern.ch:19093,https://cs-ccr-nxcals5.cern.ch:19094,"
"https://cs-ccr-nxcals6.cern.ch:19093,https://cs-ccr-nxcals6.cern.ch:19094,"
"https://cs-ccr-nxcals7.cern.ch:19093,https://cs-ccr-nxcals7.cern.ch:19094,"
"https://cs-ccr-nxcals8.cern.ch:19093,https://cs-ccr-nxcals8.cern.ch:19094,"
"https://cs-ccr-nxcalsstr4.cern.ch:19093,https://cs-ccr-nxcalsstr5.cern.ch:19093")
"https://cs-ccr-nxcals8.cern.ch:19093,https://cs-ccr-nxcals8.cern.ch:19094")
sc = SparkContext(conf=conf)
spark = SparkSession(sc)
This diff is collapsed.
This diff is collapsed.
%% Cell type:markdown id: tags:
%% Cell type:code id:2b3664d3 tags:
``` python
"""Manual spark configuration based on the default Spark configuration from the NXCALS bundle
and https://gitlab.cern.ch/msobiesz/spark-pipelines/-/blob/master/NXCALS-example-for-DAaaS.ipynb.
Used unless the spark context is already created. (outside of SWAN service or pyspark)
"""
if 'spark' not in locals() and 'spark' not in globals():
import os
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
nxcals_jars = os.getenv('NXCALS_JARS')
conf = SparkConf()
conf.set('spark.master', 'yarn')
conf.set("spark.driver.host", "spark-runner.cern.ch")
conf.set("spark.driver.port", '5001')
conf.set("spark.blockManager.port", '5101')
conf.set("spark.ui.port", '5201')
conf.set('spark.executorEnv.PYTHONPATH', os.getenv('PYTHONPATH'))
conf.set('spark.executorEnv.LD_LIBRARY_PATH', os.getenv('LD_LIBRARY_PATH'))
conf.set('spark.executorEnv.JAVA_HOME', os.getenv('JAVA_HOME'))
conf.set('spark.executorEnv.SPARK_HOME', os.getenv('SPARK_HOME'))
conf.set('spark.executorEnv.SPARK_EXTRA_CLASSPATH', os.getenv('SPARK_DIST_CLASSPATH'))
conf.set('spark.driver.extraClassPath', nxcals_jars)
conf.set('spark.executor.extraClassPath', nxcals_jars)
conf.set('spark.driver.extraJavaOptions',
"-Dservice.url=https://cs-ccr-nxcals5.cern.ch:19093,https://cs-ccr-nxcals5.cern.ch:19094,"
"https://cs-ccr-nxcals6.cern.ch:19093,https://cs-ccr-nxcals6.cern.ch:19094,"
"https://cs-ccr-nxcals7.cern.ch:19093,https://cs-ccr-nxcals7.cern.ch:19094,"
"https://cs-ccr-nxcals8.cern.ch:19093,https://cs-ccr-nxcals8.cern.ch:19094,"
"https://cs-ccr-nxcalsstr4.cern.ch:19093,https://cs-ccr-nxcalsstr5.cern.ch:19093")
sc = SparkContext(conf=conf)
spark = SparkSession(sc)
```
%% Cell type:markdown id:86e93e96 tags:
# Notebook to list QHD PM timestamps by circuit type and time range
%% Cell type:markdown id: tags:
%% Cell type:markdown id:6e01e66c tags:
# 0. Initialise Working Environment
%% Cell type:code id: tags:
%% Cell type:code id:ebe73a7a tags:
``` python
import os, sys, warnings
import pandas as pd
from IPython.display import display, HTML, Javascript, clear_output, Markdown
......@@ -28,15 +66,15 @@
analysis_start_time = Time.get_analysis_start_time()
lhcsmapi.get_lhcsmapi_version()
lhcsmapi.get_lhcsmhwc_version('../__init__.py')
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id:61c76b35 tags:
# 1. User Input (circuit types: RB, RQ, IPQ, IPD, IT)
%% Cell type:code id: tags:
%% Cell type:code id:2d5e6653 tags:parameters
``` python
detailed_circuit_types = {
'RB': ['RB'],
'RQ': ['RQ'],
......@@ -48,22 +86,32 @@
start_time = '2021-10-18 07:00:00'
stop_time = '2021-10-20 23:01:00'
```
%% Cell type:code id: tags:
%% Cell type:code id:b095af0e tags:injected-parameters
``` python
# Parameters
start_time = "2021-10-18 07:00:00"
stop_time = "2021-10-20 23:01:00"
parametrized_marker = None
```
%% Cell type:code id:8f3fb7b2 tags:
``` python
print('start_time = ', start_time)
print('stop_time = ', stop_time)
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id:5ad1ec68 tags:
# 2. Search for PMs
%% Cell type:code id: tags:
%% Cell type:code id:09ed9696 tags:
``` python
source_timestamp_qds_df = pd.DataFrame()
for circuit_type in detailed_circuit_types:
......@@ -72,16 +120,13 @@
if circuit_type == 'RQ':
circuits = circuits[0:7]
for circuit_name in circuits:
meta_circuit_type = circuit_type
if circuit_type == 'IPQ':
meta_circuit_type = SignalMetadata.get_circuit_type_for_circuit_name(circuit_name)
elif circuit_type == 'IPD':
if circuit_type == 'IPQ' or circuit_type == 'IPD':
meta_circuit_type = SignalMetadata.get_circuit_type_for_circuit_name(circuit_name)
source_timestamp_qds_df_i = QueryBuilder().with_pm() \
.with_duration(t_start=start_time, t_end=stop_time) \
.with_circuit_type(meta_circuit_type) \
.with_metadata(circuit_name=circuit_name, system='QH', source='*') \
.event_query() \
......@@ -95,34 +140,33 @@
if source_timestamp_qds_df.empty == False:
source_timestamp_qds_df['datetime'] = source_timestamp_qds_df.apply(lambda row: Time.to_string(row['timestamp']), axis=1)
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id:763ab95e tags:
# 3. The list of QHD PM Timestamps, if any
%% Cell type:code id: tags:
%% Cell type:code id:53accbff tags:
``` python
if source_timestamp_qds_df.empty == False:
display(source_timestamp_qds_df)
warnings.warn('WARNING: To be checked that QH discharges has been accepted by QHDA-notebooks!', stacklevel=2)
warnings.warn('WARNINIG: To be checked that QH discharges has been accepted by QHDA-notebooks!', stacklevel=2)
else:
print('There were no QH discharges on selected time range!')
```
%%%% Output: display_data
%% Cell type:markdown id: tags:
%% Cell type:markdown id:ef492ad3 tags:
# 4. Save html-report
%% Cell type:code id: tags:
%% Cell type:raw id:223bacf3 tags:ignore
``` python
if source_timestamp_qds_df.empty == False:
# pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
analysis_start_time = Time.get_analysis_start_time()
date_time_qhd_pm_list = Time.to_datetime(start_time).strftime("%Y-%m-%d-%Hh%M")
......@@ -134,16 +178,6 @@
file_name_html = file_name + '.html'
full_path = '/eos/project/m/mp3/LHC_QHs/{}'.format(file_name_html)
print('Compact notebook report saved to (Windows): ' + '\\\\cernbox-smb' + full_path.replace('/', '\\'))
display(Javascript('IPython.notebook.save_notebook();'))
Time.sleep(5)
#!{sys.executable} -m jupyter nbconvert --to html $'HWC_QHD_PM_LIST.ipynb' --output /eos/project/m/mp3/LHC_QHs/$file_name_html --TemplateExporter.exclude_input=True --TagRemovePreprocessor.remove_all_outputs_tags='["skip_output"]' --TagRemovePreprocessor.remove_cell_tags='["skip_cell"]'
```
%%%% Output: display_data
%%%% Output: display_data
%%%% Output: display_data
!{sys.executable} -m jupyter nbconvert --to html $'HWC_QHD_PM_LIST.ipynb' --output /eos/project/m/mp3/LHC_QHs/$file_name_html --TemplateExporter.exclude_input=True --TagRemovePreprocessor.remove_all_outputs_tags='["skip_output"]' --TagRemovePreprocessor.remove_cell_tags='["skip_cell"]'
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment