Initial commit

8370f743 · Rui Wang · 8370f743 · 8370f743 · 8370f743 · 8370f743
Commit 8370f743 authored Mar 29, 2023 by Rui Wang
--- a/.asetup.save
+++ b/.asetup.save
+#Release cmake
+
+export LANG="C"
+export LC_ALL="C"
+export COOL_ORA_ENABLE_ADAPTIVE_OPT="Y"
+export ASETUP_PRINTLEVEL="0"
+export BINARY_TAG="x86_64-centos7-gcc11-opt"
+export CMTCONFIG="x86_64-centos7-gcc11-opt"
+export ASETUP_SYSBIN=`mktemp -d /tmp/elmsheus/.asetup-sysbin-XXXXXX_$$`
+source $AtlasSetup/scripts/sys_exe-alias.sh ''
+if [ -n "${MAKEFLAGS:+x}" ]; then
+    asetup_flags=`echo ${MAKEFLAGS} | \grep ' -l'`
+    if [ -z "${asetup_flags}" ]; then
+        export MAKEFLAGS="${MAKEFLAGS} -l16"
+    fi
+else
+    export MAKEFLAGS="-j16 -l16"
+fi
+source /cvmfs/sft.cern.ch/lcg/releases/gcc/11.2.0-8a51a/x86_64-centos7/setup.sh
+export CC=`\env which gcc 2>/dev/null`
+[[ -z $CC ]] && unset CC
+export CXX=`\env which g++ 2>/dev/null`
+[[ -z $CXX ]] && unset CXX
+export FC=`\env which gfortran 2>/dev/null`
+[[ -z $FC ]] && unset FC
+export CMAKE_NO_VERBOSE="1"
+type lsetup >/dev/null 2>/dev/null
+if [ $? -ne 0 ]; then
+   source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh --quiet
+fi
+source $ATLAS_LOCAL_ROOT_BASE/packageSetups/localSetup.sh --quiet "cmake 3.21.3"
+if [ -z "${AtlasSetup:+x}" ]; then
+    export AtlasSetup="/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase/x86_64/AtlasSetup/V02-00-44/AtlasSetup"
+    export AtlasSetupVersion="AtlasSetup-02-00-44"
+fi
+export FRONTIER_SERVER="(serverurl=http://atlasfrontier-local.cern.ch:8000/atlr)(serverurl=http://atlasfrontier-ai.cern.ch:8000/atlr)(serverurl=http://ccfrontier.in2p3.fr:23128/ccin2p3-AtlasFrontier)(proxyurl=http://ca-proxy-atlas.cern.ch:3128)(proxyurl=http://ca-proxy-meyrin.cern.ch:3128)(proxyurl=http://ca-proxy.cern.ch:3128)(proxyurl=http://atlasbpfrontier.cern.ch:3127)(proxyurl=http://atlasbpfrontier.fnal.gov:3127)"
+export ATLAS_POOLCOND_PATH="/cvmfs/atlas-condb.cern.ch/repo/conditions"
+export ATLAS_DB_AREA="/cvmfs/atlas.cern.ch/repo/sw/database"
+export DBRELEASE_OVERRIDE="current"
+export SITEROOT="/cvmfs/atlas-nightlies.cern.ch/repo/sw/master_Athena_x86_64-centos7-gcc11-opt"
+export AtlasBaseDir="/cvmfs/atlas-nightlies.cern.ch/repo/sw/master_Athena_x86_64-centos7-gcc11-opt/2022-07-07T2101"
+export LCG_RELEASE_BASE="/cvmfs/atlas-nightlies.cern.ch/repo/sw/master_Athena_x86_64-centos7-gcc11-opt/sw/lcg/releases"
+export AtlasBuildStamp="2022-07-07T2101"
+export AtlasReleaseType="nightly"
+export AtlasBuildBranch="master"
+export AtlasProject="Athena"
+export TDAQ_RELEASE_BASE="/cvmfs/atlas.cern.ch/repo/sw/tdaq"
+export ATLAS_RELEASE_BASE="/cvmfs/atlas-nightlies.cern.ch/repo/sw/master_Athena_x86_64-centos7-gcc11-opt/2022-07-07T2101"
+export ATLAS_RELEASEDATA="/cvmfs/atlas-nightlies.cern.ch/repo/sw/master_Athena_x86_64-centos7-gcc11-opt/atlas/offline/ReleaseData"
+export AtlasArea="/cvmfs/atlas-nightlies.cern.ch/repo/sw/master_Athena_x86_64-centos7-gcc11-opt/2022-07-07T2101/Athena/23.0.3"
+export G4PATH="/cvmfs/atlas-nightlies.cern.ch/repo/sw/master_Athena_x86_64-centos7-gcc11-opt/Geant4"
+export AtlasVersion="23.0.3"
+source /cvmfs/atlas-nightlies.cern.ch/repo/sw/master_Athena_x86_64-centos7-gcc11-opt/2022-07-07T2101/Athena/23.0.3/InstallArea/x86_64-centos7-gcc11-opt/setup.sh
+asetup_status=$?
+if [ ${asetup_status} -ne 0 ]; then
+    \echo "AtlasSetup(ERROR): sourcing release setup script (/cvmfs/atlas-nightlies.cern.ch/repo/sw/master_Athena_x86_64-centos7-gcc11-opt/2022-07-07T2101/Athena/23.0.3/InstallArea/x86_64-centos7-gcc11-opt/setup.sh) failed"
+fi
+export TestArea="/afs/cern.ch/work/e/elmsheus/testarea/tensorflow-test"
+alias_sys_exe emacs
+echo $LD_LIBRARY_PATH | egrep "LCG_[^/:]*/curl/" >/dev/null
+if [ $? -eq 0 ]; then
+    alias_sys_exe_envU git
+fi
+\expr 1 \* 1 + 1 >/dev/null 2>&1
+if [ $? -ne 0 ]; then
+    echo -e '\nMaking workaround-alias for expr on this *OLD* machine'; alias_sys_exe expr
+fi
+export PATH="${ASETUP_SYSBIN}:${PATH}"
+
+# Check the completeness in the nightly release
+for onepath in $(echo $LD_LIBRARY_PATH | tr ":" "\n"); do
+   if [[ $onepath == ${AtlasBaseDir}* && ! -d $onepath ]]; then
+      echo \!\! FATAL \!\! $onepath does not exist
+      return 1
+   fi
+done
+
+#Release Summary as follows:
+#Release base=/cvmfs/atlas-nightlies.cern.ch/repo/sw/master_Athena_x86_64-centos7-gcc11-opt/2022-07-07T2101
+#Release project=Athena
+#Release releaseNum=23.0.3
+#Release asconfig=x86_64-centos7-gcc11-opt
+
+# Execute user-specified epilog
+
+source /cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase/swConfig/asetup/asetupEpilog.sh
+script_status=$?
+if [ ${script_status} -ne 0 ]; then
+    \echo "AtlasSetup(ERROR): User-specified epilog (source /cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase/swConfig/asetup/asetupEpilog.sh) reported failure (error ${script_status})"
+fi
--- a/run.sh
+++ b/run.sh
+#!/bin/bash
+
+`which nvidia-smi` 
+nvidia-smi 
+
+export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase;
+source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh;
+# asetup Athena,master,latest
+
+# source /cvmfs/sft.cern.ch/lcg/releases/LCG_101/tensorflow/2.5.0/x86_64-centos7-gcc11-opt/tensorflow-env.sh
+
+#lsetup "lcgenv -p LCG_101 x86_64-centos7-gcc11-opt tensorflow"
+#lsetup "lcgenv -p LCG_101_ATLAS_4 x86_64-centos7-gcc11-opt cuda"
+# lsetup "lcgenv -p LCG_101_ATLAS_4 x86_64-centos7-gcc11-opt cudnn"
+lsetup "views LCG_102b_cuda x86_64-centos7-gcc8-opt" prmon
+
+echo $LD_LIBRARY_PATH
+
+prmon -i 1 -- python3 testtf.py
+exeStat=$?
+# exit $exeStat
--- a/setup.sh
+++ b/setup.sh
+export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase
+alias setupATLAS='source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh'
+export ALRB_localConfigDir=$HOME/localConfig
+setupATLAS
+lsetup panda rucio emi prmon
+voms-proxy-init --valid 96:00 #--voms atlas:/atlas/Role=production
--- a/sub.sh
+++ b/sub.sh
+#!/bin/bash
+#SBATCH -A m2616_g
+#SBATCH -C gpu
+#SBATCH --qos=debug
+#SBATCH --time 00:10:00
+#SBATCH --module=cvmfs
+##SBATCH -L SCRATCH,project
+#SBATCH -N 1
+#SBATCH -G 2
+#SBATCH -c 1
+
+# to run: sbatch sub.sh
+. ./run.sh # standelone
+# workerID=9999
+# nNode=1
+# export PANDA_HOME=/global/cfs/cdirs/m2616/harvester-perlmutter
+# export PANDA_QUEUE=NERSC_Perlmutter_GPU
+# export HARVESTER_DIR=$PANDA_HOME  # PANDA_HOME is defined in etc/sysconfig/panda_harvester
+# export HARVESTER_WORKER_ID=${workerID}
+# export HARVESTER_ACCESS_POINT=/pscratch/sd/r/rwang/tensorflow-test/workdir/${workerID}
+# export HARVESTER_NNODE=${nNode}
+# export HARVESTER_NTASKS=$((1 * ${nNode}))
+# export HARVESTER_MAPTYPE=ManyToOne
+
+
+# export wrapper_wrapper_file=$HARVESTER_DIR/etc/panda/wrapper-wrapper-3.sh
+
+# echo [$SECONDS] "Copy $wrapper_wrapper_file into $HARVESTER_ACCESS_POINT"
+# mkdir -p ${HARVESTER_ACCESS_POINT}
+# cd ${HARVESTER_ACCESS_POINT}
+# cp -v $wrapper_wrapper_file ./
+# /bin/bash ./wrapper-wrapper-3.sh $PANDA_QUEUE $HARVESTER_ACCESS_POINT
--- a/submit.sh
+++ b/submit.sh
+#!/bin/bash
+
+#prun --exec="./run.sh" --outDS user.elmsheus.gputest.0003 --outputs=out.txt --site ANALY_MANC_GPU_TEST --disableAutoRetry --architecture nvidia-gpu --noBuild --container centos7
+
+#prun --exec="./run.sh" --outDS user.elmsheus.tensorflow.0032 --disableAutoRetry --noBuild --extFile=run.sh,testtf.py --site GOOGLE_GPU --cmtConfig="centos7"
+
+#prun --exec="./run.sh" --outDS user.elmsheus.tensorflow.0109 --disableAutoRetry --noBuild --extFile=run.sh,testtf.py --cmtConfig="centos7" --site ANALY_OU_OSCER_GPU_TEST
+
+#prun --exec="./run.sh" --outDS user.elmsheus.tensorflow.0300 --disableAutoRetry --noBuild --extFile=run.sh,testtf.py --cmtConfig="centos7" --site ANALY_BNL_GPU_ARC
+#prun --exec="./run.sh" --outDS user.elmsheus.tensorflow.0301 --disableAutoRetry --noBuild --extFile=run.sh,testtf.py --cmtConfig="centos7" --site ANALY_INFN-T1_GPU
+#prun --exec="./run.sh" --outDS user.elmsheus.tensorflow.0302 --disableAutoRetry --noBuild --extFile=run.sh,testtf.py --cmtConfig="centos7" --site ANALY_MANC_GPU
+#prun --exec="./run.sh" --outDS user.elmsheus.tensorflow.0303 --disableAutoRetry --noBuild --extFile=run.sh,testtf.py --cmtConfig="centos7" --site ANALY_MWT2_GPU
+#prun --exec="./run.sh" --outDS user.elmsheus.tensorflow.0304 --disableAutoRetry --noBuild --extFile=run.sh,testtf.py --cmtConfig="centos7" --site ANALY_OU_OSCER_GPU_TEST
+#prun --exec="./run.sh" --outDS user.elmsheus.tensorflow.0305 --disableAutoRetry --noBuild --extFile=run.sh,testtf.py --cmtConfig="centos7" --site ANALY_QMUL_GPU
+#prun --exec="./run.sh" --outDS user.elmsheus.tensorflow.0306 --disableAutoRetry --noBuild --extFile=run.sh,testtf.py --cmtConfig="centos7" --site ANALY_SLAC_GPU
+#prun --exec="./run.sh" --outDS user.elmsheus.tensorflow.0307 --disableAutoRetry --noBuild --extFile=run.sh,testtf.py --cmtConfig="centos7" --site DESY-HH_GPU
+# prun --exec="./run.sh" --outDS user.rwang.tensorflow.`uuidgen` --disableAutoRetry --noBuild --extFile=run.sh,testtf.py --cmtConfig="centos7" --site NERSC_Perlmutter_GPU --nCore=1  --voms atlas:/atlas/Role=production
+# prun --exec=". ./run.sh" --outDS user.rwang.tensorflow.`uuidgen` --disableAutoRetry --noBuild --extFile=run.sh,testtf.py --cmtConfig="centos7" --site NERSC_Perlmutter_GPU --nCore=2 --destSE MWT2_DATADISK
+
+## single node ##
+prun --exec="./run.sh" --inDS hc_test:mc15_13TeV.361106.PowhegPythia8EvtGen_AZNLOCTEQ6L1_Zee.evgen.EVNT.e3601_tid04972714_00  --nFiles 1 --outDS user.$USER.tensorflow.`uuidgen` --outputs my_model.h5 --disableAutoRetry --noBuild  --extFile=*.log,*.h5,prmon*  --site NERSC_Perlmutter_GPU_Test --nCore=4 --architecture nvidia # --prodSourceLabel test
+
+## multi node ##
+# aprun -n 2 -d 8 --exec="./run.sh" --outDS user.rwang.tensorflow.`uuidgen` --outputs my_model.h5 --disableAutoRetry --noBuild  --extFile=*.log,*.h5,prmon*  --site NERSC_Perlmutter_GPU_Test --nCore=7 --architecture nvidia # 
+
+#ANALY_BNL_GPU_ARC : test
+#ANALY_INFN-T1_GPU : brokeroff
+#ANALY_MANC_GPU : online
+#ANALY_OU_OSCER_GPU_TEST : test
+#ANALY_QMUL_GPU : test
+#ANALY_SLAC_GPU : online
+#GOOGLE_GPU : offline
+
+
+# --cmtConfig="centos7"
+# --architecture "&nvidia-gpu"
+# ANALY_MANC_GPU
+# GOOGLE_GPU
+
+#--cmtConfig nvidia-gpu@centos7
+
+#--cmtConfig nvidia-gpu
--- a/testtf.py
+++ b/testtf.py
+#!/usr/env/python
+
+import tensorflow as tf
+import keras
+import numpy as np
+import os, sys
+
+def main():
+
+  print("TensorFlow version:", tf.__version__)
+
+  tf.debugging.set_log_device_placement(True)
+
+  ngpu = len(tf.config.list_physical_devices('GPU'))
+  print("Num GPUs Available: ", ngpu )
+
+  if ngpu:
+    print('GPU found')
+  else:
+    print("No GPU found")
+    sys.exit(2)
+
+  # mnist = tf.keras.datasets.mnist
+
+  # (x_train, y_train), (x_test, y_test) = mnist.load_data()
+  origin_folder = 'http://storage.googleapis.com/tensorflow/tf-keras-datasets/'
+  path = keras.utils.data_utils.get_file(
+      'mnist.npz',
+      origin=origin_folder + 'mnist.npz',
+      file_hash=
+      '731c5ac602752760c8e48fbffcf8c3b850d9dc2a2aedcf2cc48468fc17b673d1')
+
+  with np.load(path, allow_pickle=True) as f:  # pylint: disable=unexpected-keyword-arg
+    x_train, y_train = f['x_train'], f['y_train']
+    x_test, y_test = f['x_test'], f['y_test']
+  
+  x_train, x_test = x_train / 255.0, x_test / 255.0
+
+  model = tf.keras.models.Sequential([
+    tf.keras.layers.Flatten(input_shape=(28, 28)),
+    tf.keras.layers.Dense(128, activation='relu'),
+    tf.keras.layers.Dropout(0.2),
+    tf.keras.layers.Dense(10)
+  ])
+
+  predictions = model(x_train[:1]).numpy()
+  print(predictions)
+
+  tf.nn.softmax(predictions).numpy()
+
+  loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+  loss_fn(y_train[:1], predictions).numpy()
+
+  model.compile(optimizer='adam',
+                loss=loss_fn,
+                metrics=['accuracy'])
+
+  model.fit(x_train, y_train, epochs=5)
+
+  model.evaluate(x_test,  y_test, verbose=2)
+
+  probability_model = tf.keras.Sequential([
+    model,
+    tf.keras.layers.Softmax()
+  ])
+
+  print (probability_model(x_test[:5]))
+  model.save("my_model.h5")
+
+if __name__ == "__main__":
+  main()