Commit 1a7d93ef authored by Michal Maciejewski's avatar Michal Maciejewski
Browse files

Added means of executing a SWAN notebook on demand

parent 0bcdd932
......@@ -2,7 +2,6 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
# parameters
import time, sys
ts=str(int(time.time()))
notebook=sys.argv[1]+'.ipynb'
output_ipynb=ts+'-'+notebook+'.ipynb'
output_pdf=ts+'-'+notebook+'.pdf'
# execute notebook
import papermill as pm
pm.execute_notebook(
notebook,
output_ipynb,
parameters = dict(end_time="2018-10-26 00:00:00.000")
)
# save to pdf
import nbformat
from nbconvert import PDFExporter
with open(output_ipynb) as f:
nb = nbformat.read(f, as_version=4)
pdf_exporter = PDFExporter()
pdf_exporter.exclude_input = True
pdf_data, resources = pdf_exporter.from_notebook_node(nb)
with open(output_pdf, "wb") as f:
f.write(pdf_data)
f.close()
# copy to EOS
import subprocess, os
remote_dir = 'root://eosuser.cern.ch/'+os.environ.get('OUTPUT_DIR')
remote_file = remote_dir + output_pdf
subprocess.call(['xrdcp', output_pdf, remote_file])
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
log4j.rootCategory=ERROR, console
log4j.logger.org.eclipse.jetty=WARN
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=ERROR
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=ERROR
log4j.logger.org.apache.spark.api.python.PythonGatewayServer=ERROR
log4j.logger.org.apache.spark=ERROR, console
from cern.nxcals.pyquery.builders import *
import seaborn as sn
import pandas as pd
from matplotlib import pyplot as plt
from datetime import datetime, timezone, date, timedelta
from pyspark import SQLContext
from pyspark.sql.functions import pandas_udf, PandasUDFType
# Stop spark session
try:
spark.stop()
except NameError:
pass
# Manual spark configuration to execute notebook outside of SWAN service
import os
import random
import subprocess
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
if "SPARK_PORTS" in os.environ:
ports = os.getenv("SPARK_PORTS").split(",")
else:
ports = [random.randrange(5001,5300),random.randrange(5001,5300),random.randrange(5001,5300)]
# change to SPARK_MASTER_IP
nxcals_jars=subprocess.run(['ls $LCG_VIEW/nxcals/nxcals_java/* | xargs | sed -e "s/ /:/g"'], shell=True, stdout=subprocess.PIPE, env=os.environ).stdout.decode('utf-8')
conf = SparkConf()
conf.set('spark.master', 'yarn')
conf.set('spark.authenticate', True)
conf.set('spark.network.crypto.enabled', True)
conf.set('spark.authenticate.enableSaslEncryption', True)
conf.set("spark.logConf", True)
conf.set("spark.driver.host", os.environ.get('SERVER_HOSTNAME'))
conf.set("spark.driver.port", ports[0])
conf.set("spark.blockManager.port", ports[1])
conf.set("spark.ui.port", ports[2])
conf.set('spark.executorEnv.PYTHONPATH', os.environ.get('PYTHONPATH'))
conf.set('spark.executorEnv.LD_LIBRARY_PATH', os.environ.get('LD_LIBRARY_PATH'))
conf.set('spark.executorEnv.JAVA_HOME', os.environ.get('JAVA_HOME'))
conf.set('spark.executorEnv.SPARK_HOME', os.environ.get('SPARK_HOME'))
conf.set('spark.executorEnv.SPARK_EXTRA_CLASSPATH', os.environ.get('SPARK_DIST_CLASSPATH'))
#conf.set('spark.executorEnv.HADOOP_TOKEN_FILE_LOCATION', "/spark/hadoop.toks")
conf.set('spark.driver.extraClassPath', nxcals_jars)
conf.set('spark.executor.extraClassPath', nxcals_jars)
conf.set('spark.driver.extraJavaOptions',"-Dlog4j.configuration=file:/eos/project/s/swan/public/NXCals/log4j_conf -Dservice.url=https://cs-ccr-nxcals6.cern.ch:19093,https://cs-ccr-nxcals7.cern.ch:19093,https://cs-ccr-nxcals8.cern.ch:19093 -Djavax.net.ssl.trustStore=/etc/pki/tls/certs/truststore.jks -Djavax.net.ssl.trustStorePassword=password")
sc = SparkContext(conf=conf)
spark = SparkSession(sc)
start_time = "2018-07-10 00:00:00.000"
end_time = "2018-07-31 23:00:00.000"
# we should really exclude the XpocData property
df = DevicePropertyQuery\
.builder(spark)\
.system("CMW")\
.startTime(start_time)\
.endTime(end_time)\
.entity()\
.deviceLike("MKD.UA63.SCSS.AB1")\
.propertyLike("XpocData")\
.buildDataset()\
.select('acqStamp')\
.toPandas()
print(len(df))
LCG_VIEW=/cvmfs/sft.cern.ch/lcg/views/LCG_95apython3_nxcals/x86_64-centos7-gcc7-opt
TRUST_STORE=/tmp/truststore$RANDOM.jks
source $LCG_VIEW/setup.sh
source /cvmfs/sft.cern.ch/lcg/etc/hadoop-confext/hadoop-swan-setconf.sh hadoop-nxcals
PYTHONPATH=$PYTHONPATH:.
keytool -import -alias cerngridCA -file /etc/pki/tls/certs/CERN_Grid_Certification_Authority.crt -keystore $TRUST_STORE -storepass 'password' -noprompt
keytool -import -alias cernRootCA2 -file /etc/pki/tls/certs/CERN_Root_Certification_Authority_2.crt -keystore $TRUST_STORE -storepass 'password' -noprompt
pyspark --master yarn \
--jars $(ls $LCG_VIEW/nxcals/nxcals_java/* | xargs | sed -e 's/ /,/g') \
--conf spark.executorEnv.PYTHONPATH=$PYTHONPATH \
--conf spark.executor.extraClassPath=$LCG_VIEW/nxcals/nxcals_java/jackson-annotations-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-core-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-core-asl-1.9.13.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-databind-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-dataformat-yaml-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-datatype-jdk8-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-datatype-jsr310-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-jaxrs-1.9.13.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-mapper-asl-1.9.13.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-module-parameter-names-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-module-paranamer-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-module-scala_2.11-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-xc-1.9.13.jar \
--conf spark.driver.extraClassPath=$LCG_VIEW/nxcals/nxcals_java/jackson-annotations-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-core-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-core-asl-1.9.13.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-databind-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-dataformat-yaml-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-datatype-jdk8-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-datatype-jsr310-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-jaxrs-1.9.13.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-mapper-asl-1.9.13.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-module-parameter-names-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-module-paranamer-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-module-scala_2.11-2.10.0.jar:$LCG_VIEW/nxcals/nxcals_java/jackson-xc-1.9.13.jar \
--conf spark.driver.extraJavaOptions="-Dservice.url=https://cs-ccr-nxcals6.cern.ch:19093,https://cs-ccr-nxcals7.cern.ch:19093,https://cs-ccr-nxcals8.cern.ch:19093 -Djavax.net.ssl.trustStore=$TRUST_STORE -Djavax.net.ssl.trustStorePassword=password"
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment