diff --git a/Control/AthenaMP/CMakeLists.txt b/Control/AthenaMP/CMakeLists.txt index 34973fe9d020ddb4aeec74da72e6bb1ad876a6ad..7872826067a75fff878895e4d525ef87b73369c4 100644 --- a/Control/AthenaMP/CMakeLists.txt +++ b/Control/AthenaMP/CMakeLists.txt @@ -19,6 +19,5 @@ atlas_add_executable( getSharedMemory src/memory-profiler/getPss.cc ) # Install files from the package: -atlas_install_python_modules( python/*.py python/tests/*.py ) -atlas_install_joboptions( share/AthenaMP_EventService.py ) +atlas_install_python_modules( python/*.py ) diff --git a/Control/AthenaMP/share/AthenaMP_EventService.py b/Control/AthenaMP/python/EventService.py similarity index 85% rename from Control/AthenaMP/share/AthenaMP_EventService.py rename to Control/AthenaMP/python/EventService.py index cd497963582467655e3f3ce974a76892c32c6eb8..c9628cda6f731ac99605465145f487c71b74efe8 100644 --- a/Control/AthenaMP/share/AthenaMP_EventService.py +++ b/Control/AthenaMP/python/EventService.py @@ -1,3 +1,5 @@ +# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration + # Configure AthenaMP Strategy from AthenaMP.AthenaMPFlags import jobproperties as jps jps.AthenaMPFlags.Strategy="EventService" diff --git a/Control/AthenaMP/python/IoUtils.py b/Control/AthenaMP/python/IoUtils.py deleted file mode 100644 index 91714fb0d2732f5b728a4b2dca34142a9fa9fef4..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/python/IoUtils.py +++ /dev/null @@ -1,191 +0,0 @@ -# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration - -# @file AthenaMP.IoUtils -# @purpose I/O utils -# @author Mous Tatarkhanov <tmmous@berkeley.edu> - -__doc__ = "I/O related utils for AthenaMP" -__version__ = "$Revision: 1.0$" -__author__ = "Mous Tatarkhanov <tmmous@berkeley.edu>" - -from AthenaCommon.Logging import log as msg #logging handle -from GaudiMP.IoRegistry import IoRegistry -import six -_debug = msg.debug -_info = msg.info - - -def create_worker_dir(top_wkdir): - """create tmp wkdir for forked worker under top_wkdir""" - #changing the workdir to tmp location - import os - from os.path import abspath as _abspath, join as _join - ppid = os.getppid() - pid = os.getpid() - curdir = _abspath (os.curdir) - wkdir = _abspath (_join (top_wkdir,str(pid))) - _info("curdir=[%s]", curdir) - _info("top_wkdir=[%s]", top_wkdir) - _info("wkdir=[%s]", wkdir) - if os.path.exists(wkdir): - import shutil - shutil.rmtree (wkdir) - os.makedirs (wkdir) - - return wkdir - -def update_io_registry(wkdir, mpid, iocomp_types=None): - """helper method to correctly update the IoRegistry instances - """ - import os - from os.path import join as _join - from os.path import basename as _basename - from os.path import isabs as _isabs - - from PyUtils.PoolFile import PoolFileCatalog - - # ioreg is a dict: - # {'iocomp-name' : { 'old-fname' : ['iomode', 'new-fname'] }, ... } - ioreg = IoRegistry.instances - msg.debug("ioreg::: %s" % ioreg) - - pfc = PoolFileCatalog() - - ioreg_items = six.iteritems (IoRegistry.instances) - for iocomp,iodata in ioreg_items: - #print "--iocomp,len(iodata)",iocomp, len(iodata) - io_items = six.iteritems(iodata) - for ioname,ioval in io_items: - # handle logical filenames... - #ioname=pfc(ioname) - pfc_name = pfc(ioname) - if (pfc_name != ioname): - ioreg[iocomp][ioname][1]=pfc_name - - ##print " --iocomp,ioname,ioval",iocomp,ioname,ioval - iomode,newname = ioval[0], ioval[1] or ioname - if iomode == '<output>': - newname = _join (wkdir, - "mpid_%s__%s"%(str(mpid).zfill(3), - _basename(ioname))) - msg.debug ("update_io_registry:<output>: newname=%s" % newname) - elif iomode == '<input>': - if not _isabs(ioname) and not ioname.startswith("root:") and not ioname.startswith("rfio"): - # FIXME: handle URLs/URIs... - src = os.path.abspath(_join(os.curdir, ioname)) - dst = _join(wkdir, ioname) - os.symlink(src, dst) - msg.debug( "update_io_registry:<input> created symlink %s for" % dst) - else: - raise ValueError ("unexpected iomode value: %r"%iomode) - ioreg[iocomp][ioname][1] = newname - pass - pass - msg.debug( "IoRegistry.instances=%s" % IoRegistry.instances ) - return # update_io_registry - -def redirect_log(wkdir): - """redirect stdout and stderr of forked worker to tmp wkdir""" - import os, sys - import multiprocess as mp - # define stdout and stderr names - - stdout = os.path.join(wkdir, 'stdout') - stderr = os.path.join(wkdir, 'stderr') - _info(" stdout->[%s]" % stdout) - _info(" stderr->[%s]" % stderr) - - # synch-ing log - map(lambda x: x.flush(), (sys.stdout, sys.stderr)) - - flags = os.O_CREAT | os.O_WRONLY - fdout = os.open (stdout, flags) - assert fdout>=0, \ - "problem child [%r] opening stdout" % mp.current_process() - fileno = sys.stdout.fileno() - os.close (fileno) - os.dup2 (fdout, fileno) - - fderr = os.open (stderr, flags) - assert fderr>=0, \ - "problem child [%r] opening stderr" % mp.current_process() - fileno = sys.stderr.fileno() - os.close (fileno) - os.dup2 (fderr, fileno) - -def reopen_fds(wkdir=""): - """redirect workers fds by setting proper flags - <INPUT> -> duplicate fd in RDONLY mode - <OUTPUT> -> redirect fd to local output-copy w/ duplicate flags. - """ - _info("reopen_fds: in dir [%s]" % wkdir) - import os, fcntl - #from IoRegistry import IoRegistry as ioreg - _join = os.path.join - - _fds = IoRegistry.fds_dict - _fds.create_symlinks(wkdir) - - for k, v in six.iteritems(_fds): - fd = k; - (real_name, iomode, flags) = v - if not os.path.isfile (real_name): - _debug("reopen_fds: %s is not file" % real_name) - continue - - if (iomode == '<OUTPUT>'): - #expect the copy of <OUTPUT> to be in curdir - _debug("reopen_fds: <OUTPUT> %s " % real_name) - pos = os.lseek(fd, 0, os.SEEK_CUR) - #PC HACK remove! - try: - new_fd = os.open (_join(wkdir, os.path.basename(real_name)), flags) - os.lseek(new_fd, pos, os.SEEK_SET) - except Exception as err: - msg.warning("Exception caught handling OUTPUT file %s: %s" % (real_name, err) ) - msg.warning(" ...ignoring file FIXME!") - continue - else: - _debug("reopen_fds:<INPUT> %s" % real_name) - new_fd = os.open (real_name, os.O_RDONLY) - - os.close (fd) - os.dup2 (new_fd, fd) - fcntl.fcntl (fd, fcntl.F_SETFL, flags) - os.close (new_fd) - -def reopen_fds_old(): - import os, fcntl - _realpath = os.path.realpath - _join = os.path.join - # reopen file descriptors - procfd = '/proc/self/fd' - fds = os.listdir(procfd) - for i in fds: - fd = int(i) - real_name = _realpath(_join(procfd,i)) - if not os.path.exists (real_name): - continue - flags = fcntl.fcntl (fd, fcntl.F_GETFL) - new_fd = os.open (real_name, os.O_RDONLY) - os.close (fd) - os.dup2 (new_fd, fd) - fcntl.fcntl (fd, fcntl.F_SETFL, flags) - os.close (new_fd) - - -def print_fds(msg): - """print all file descriptors of current process""" - import os, fcntl - _realpath = os.path.realpath - _join = os.path.join - # print out file descriptors - procfd = '/proc/self/fd' - fds = os.listdir(procfd) - for fd in fds: - i = int(fd) - realname = _realpath(_join(procfd,fd)) - msg.info("fd=[%i], realname=[%s] exists=[%s]", - i, realname, os.path.exists(realname)) - return - diff --git a/Control/AthenaMP/python/MpProcessing.py b/Control/AthenaMP/python/MpProcessing.py deleted file mode 100755 index 5cb7c0bc7d4cb68f271f61d67d2baf177a8679c3..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/python/MpProcessing.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration - -import multiprocessing, os, sys, types -dlflags = sys.getdlopenflags() -sys.setdlopenflags( 0x100 | 0x2 ) # RTLD_GLOBAL | RTLD_NOW -import _athenamp as amp -sys.setdlopenflags( dlflags ) - -__all__ = [ 'cpu_count' ] - - -# cpu_count is pure python (but does call sysconf for Linux) -cpu_count = multiprocessing.cpu_count - -# the following sets are replacements, which are accessed on C++ through the -# _athenamp extesion module for now - -# current_process is used for identification purposes in multiprocessing; it -# serves no real purpose in AthenaMP (it's only used for printing a message) -# since there is a single relation of a mother process with multiple children - -def current_process(): - ''' - Return process object representing the current process - ''' - return amp.Process( os.getpid() ) - -# active_children does not match exactly, but since AthenaMP only starts -# readers/workers/writers from the mother, an aggregate of all groups will -# do; note also that b/c of the AthenaMP model, no cleanup is needed (as is -# in multiprocessing: Pools could close and re-open there) - -_process_groups = [] -def active_children(): - """ - Return list of process objects corresponding to live child processes - """ - ac = list() - - global _process_groups - for g in _process_groups: - ac += g._children() - return ac - - -##### class Queue -Queue = amp.SharedQueue - - -##### class Pool -class MapResults( object ): - def __init__( self, group ): - self._group = group - - def get( self, *args, **kw ): - status = self._group.wait() - - # there are two parts to the exit code: the reported result from the worker - # function and the process' exit code - - # TODO: handle process' exit code in a cleaner way (fix in PyComps?), as this - # obviously does not work in general ... - - result = [] - for proc_result in status: - # The result from run_worker_queue is extremely circumspect: it can either - # contain a single tuple of 5 entries, 1 tuple of 4 entries, or 2 tuples of - # 4 entries. It is checked on being 'OK' on the last entry of each tuple. - # By sheer coincidence, that happens to work. - - # Now, the tuple can not be edited, so it's turned into a list, which takes - # care of the spurious arrangement of lengths. Then for all possible returns, - # the third entry is always theApp._exitstate and may need modification. - - # General exceptions are eaten, b/c this is so PyComps specific. :P - r = proc_result[ 2 ] # the python-posted result - try: - import types - if type(r[0]) == types.TupleType: # early return case - z = [ list(y) for y in r ] - for l2 in z: - if l2[2] == 0: l2[2] = proc_result[1] - else: - z = [ [ list(y) for y in x ] for x in r ] # late return case - for l1 in z: - for l2 in l1: - if l2[2] == 0: l2[2] = proc_result[1] - except Exception: - # make-believe? can happen e.g. on early exit() or e.g. SIGQUIT - r = [[(proc_result[0], -1, proc_result[1], 'ERR')]] - result.append( r ) - return result - -class Pool( object ): - packaged_count = 0 - - def __init__( self, processes = None, initializer = None, initargs = (), - maxtasksperchild = None ): - - if not callable( initializer ): - raise TypeError( 'initializer must be a callable' ) - - # this workaround is needed b/c initargs can (and do) contain an amp.SharedQueue, - # which can not be marshalled, but COW will deal with it properly by binding it - # into this local 'packaged_initializer' - def packaged_initializer( initializer = initializer, initargs = initargs ): - return initializer( *initargs ) - - self.packaged_count += 1 - self._initializer = '_amp_pool_%s_%d' % (initializer.__name__,self.packaged_count) - - import __main__ - setattr( __main__, self._initializer, packaged_initializer ) - - self._group = amp.ProcessGroup( processes ) - global _process_groups - _process_groups.append( self._group ) - - def map_async( self, func, iterable, chunksize=1 ): - # NOTE: initializer results are ignored (same as in multiprocessing) - self._group.map_async( self._initializer ) - - # TODO: hand over iterable properly (it just so happens that in AthenaMP, it is - # a repeated list of MaxEvent, for use of reading from the queue) - self._group.map_async( '%s.%s' % (func.__module__,func.__name__), iterable[0] ) - return MapResults( self._group ) - - def close( self ): - self._group.map_async( 'exit' ) - - global _process_groups - _process_groups.remove( self._group ) - - def join( self ): - pass # alternative functionality for now - -# other features of multiprocessing are not used by AthenaMP (but might be -# used by others; the following facade forwards for now - -class ModuleFacade( types.ModuleType ): - def __init__( self, module, name ): - types.ModuleType.__init__( self, name ) - self.__dict__[ 'module' ] = module - - import multiprocessing - self.__dict__[ 'pmp' ] = multiprocessing - - def __getattr__( self, attr ): - try: - return getattr( self.module, attr ) - except AttributeError: - pass - - import AthenaCommon.Logging, logging - log = logging.getLogger( 'AthenaMP.MpProcessing' ) - log.error( 'missing attribute %s (falling back on multiprocessing)', attr ) - return getattr( self.pmp, attr ) - - -sys.modules[ __name__ ] = ModuleFacade( sys.modules[ __name__ ], __name__ ) -del ModuleFacade diff --git a/Control/AthenaMP/python/Utils.py b/Control/AthenaMP/python/Utils.py deleted file mode 100644 index 954ab638aa2eff6b4c6daff18d3e1c9cb5cca74c..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/python/Utils.py +++ /dev/null @@ -1,184 +0,0 @@ -# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration - -# @file: AthenaMP.Utils -# @purpose: a set of tools to handle various aspects of AthenaMP -# @author Sebastien Binet <binet@cern.ch> -# @coauthor: Mous Tatarkhanov <tmmous@berkeley.edu> - -__doc__ = "Misc. utils for AthenaMP" -__version__ = "$Revision: 1.1 $" -__author__ = "Sebastien Binet <binet@cern.ch>" - -import os - -from future import standard_library -standard_library.install_aliases() -import subprocess - -#-----Helper tools for AthenaMP--# - -def get_mp_root(msg=""): - tmp_root=os.getenv("ATHENA_MP_TMPDIR") - if msg == "": - from AthenaCommon.Logging import log as msg - if tmp_root is None: - import tempfile - if tempfile.tempdir is None: - tmp_root = "/tmp" - else: - tmp_root = tempfile.tempdir - else: - msg.debug("Using ATHENA_MP_TMPDIR environment variable to set athena-mp dir") - username = "MP" - if os.getenv("LOGNAME") != None : - username = os.getenv("LOGNAME") - elif os.getenv("USER") != None : - username = os.getenv("USER") - return os.sep.join([tmp_root,"athena-mp-tmp-%s" % username]) - -def hack_copy(srcDir, destDir): - """ explicitly copy files not captured by IoRegistry""" - #couple of sanity checks - if srcDir == '': - srcDir = os.curdir - if srcDir == destDir: - from AthenaCommon.Logging import log as msg - msg.warning("hack_copy called with srcDir = destDir = "+srcDir) - return - - #A hack for RDO reconstruction after nextEvent(n) - filepattern_list = [ 'DigitParams.db', 'SimParams.db', - '*.pmon.stream' ] - - import fnmatch - import shutil - import os - for pattern in filepattern_list: - for f in os.listdir(srcDir): - if fnmatch.fnmatch(f, pattern): - import os.path - src_path = os.path.join(srcDir,f) - dest_path = os.path.join(destDir,f) - if not os.path.isfile(dest_path): - shutil.copyfile(src_path, dest_path) - -def slice_it(iterable, cols=2): - from itertools import islice - start = 0 - chunksz,extra = divmod (len(iterable), cols) - if extra: - chunksz += 1 - for i in range(cols): - yield islice (iterable, start, start+chunksz) - start += chunksz - -#-----memory status tools---# -import sys -if 'linux' in sys.platform: - def mem_status(msg): - """memory usage information: shared/private""" - for line in open('/proc/self/status'): - if line.startswith('Vm'): - msg.debug (line.strip()) - private,shared=_get_mem_stats() - msg.info ("===> private: %s MB | shared: %s MB", - private/1024., - shared /1024.) -else: - def mem_status(msg): - """memory usage information: dummy for non-linux""" - return - -PAGESIZE=os.sysconf("SC_PAGE_SIZE")/1024 #KiB -have_pss=0 -def _get_mem_stats(pid='self'): - """parses /proc/self/statm and smaps for memory usage info""" - global have_pss - private_lines=[] - shared_lines=[] - pss_lines=[] - statm_name = "/proc/%s/statm" % pid - smaps_name = "/proc/%s/smaps" % pid - rss=int(open(statm_name).readline().split()[1])*PAGESIZE - if os.path.exists(smaps_name): #stat - for line in open(smaps_name).readlines(): #open - if line.startswith("Shared"): - shared_lines.append(line) - elif line.startswith("Private"): - private_lines.append(line) - elif line.startswith("Pss"): - have_pss=1 - pss_lines.append(line) - shared=sum([int(line.split()[1]) for line in shared_lines]) - private=sum([int(line.split()[1]) for line in private_lines]) - #Note shared + private = rss above - #The rss in smaps includes video card mem etc. - if have_pss: - pss_adjust=0.5 #add 0.5KiB as this average error due to trunctation - Pss=sum([float(line.split()[1])+pss_adjust for line in pss_lines]) - shared = Pss - private - #elif (2,6,1) <= kv <= (2,6,9): - # shared=0 #lots of overestimation, but what can we do? - # private = rss - else: - shared=int(open(statm_name).readline().split()[2]) - shared*=PAGESIZE - private = rss - shared - return (private, shared) - - -#---- CPU-Proc affinty setting tools---# -if 'linux' in sys.platform: - def get_cpu(pid): - """get core nbr where the proc-pid resides at that moment""" - cmd = "ps --pid %i -o psr" % pid - #print ">%s" % cmd - out = subprocess.getoutput(cmd) - cpu = int(out.splitlines()[1].split()[0]) - #print "pid: [%i] has cpu: [%i]" % (pid, cpu) - return cpu -else: - def get_cpu(pid): - """dummy on non-linux""" - return 0 - -def set_proc_affinity(pid, cpu): - """set pid to cpu affinity for process""" - cmd = "taskset -pc %i %i" % (cpu, pid) - #print "> taskset -pc %i %i" % (cpu, pid) - st,out = subprocess.getstatusoutput(cmd) - return st - - -#---- AthenaMP profiling related tools---# -time_list = list() -time_list2 = list() - -def watch(msg=None, message=""): - import time - """Timer (elap, user, system, child) with time-interval-reports into msg stream""" - global time_list, time_list2 - time_list.append(os.times()) - time_list2.append(time.time()) - - if msg is not None: - (utime, stime, cutime, cstime, etime) = dt(); - elap_time = "%s_ELAP_TIME=%.4f seconds" % (message, etime) - user_time = "%s_USER_TIME=%.2f" % (message, utime) - system_time = "%s_SYSTEM_TIME=%.2f" % (message, stime) - child_utime = "%s_CHILD_UTIME=%.2f" % (message, cutime) - child_stime = "%s_CHILD_STIME=%.2f" % (message, cstime) - msg.info(elap_time) - msg.debug("%s %s" % (user_time, system_time) ) - msg.debug("%s %s" % (child_utime, child_stime) ) - return len(time_list) - -def dt(n=-1): - import operator - return tuple( map(operator.sub, time_list[n], time_list[n-1]) ) - -def dt2(n=-1): - return time_list2[n] - time_list2[n-1] - - - diff --git a/Control/AthenaMP/python/tests/mjMonTools.py b/Control/AthenaMP/python/tests/mjMonTools.py deleted file mode 100644 index 9db47f643db44bbf35e85e879ec83219ab1c9ef7..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/python/tests/mjMonTools.py +++ /dev/null @@ -1,2252 +0,0 @@ -# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration - -# @file: mpMonTools.py -# @purpose: Library for mp performance monitoring of AthenaMP -# @author: Mous Tatarkhanov <tmmous@cern.ch> -# @date: December 2009 - -from __future__ import print_function - -__version__ = "$Revision: 276791 $" -__author__ = "Mous Tatarkhanov <tmmous@cern.ch>" - -import sys -import os -import signal -import time -import array -import copy - -from future import standard_library -standard_library.install_aliases() -import subprocess - - -TIME_STEP = 10 -KB = (1 << 10) -MB = (1 << 20) -GB = (1 << 30) - - -PAGESIZE=os.sysconf("SC_PAGE_SIZE")/1024 #KiB -have_pss=0 - -mp_stat = dict() #dictionary which will hold all statistics: ProcDict objects, io-,mem-,cpu-dicts -pid_list = list() #book-keeping of all spawned and launched processes for later clean-up -init_mem = list() #get_memstat() -init_numa = list() #get_numastat() - -T0 = time.time() -numa_T0 = T0 - -def init_mp_stat(): - global mp_stat - global pid_list - global T0 - global init_mem - global init_numa - global numa_T0 - - del mp_stat #delete data from previous np - del pid_list #delete data from previous np - - #brand new mp_stat - mp_stat = dict() - pid_list = list() - mp_stat['io'] = dict() - mp_stat['mem'] = dict() - mp_stat['cpu'] = dict() - #mp_stat['cpid'] = dict() - mp_stat['pid'] = dict() - mp_stat['numa'] = dict() - mp_stat['numa']['Time'] = list() - mp_stat['numa']['numa_hit'] = list() - mp_stat['numa']['numa_miss'] = list() - mp_stat['numa']['numa_foreign'] = list() - mp_stat['numa']['interleave_hit'] = list() - mp_stat['numa']['local_node'] = list() - mp_stat['numa']['other_node'] = list() - - T0 = time.time() - init_mem = get_memstat() - init_numa = list(get_numastat()) - init_numa[0] = numa_T0 - - print ("initial_mem=%s" % init_mem) - print ("initial_numa=%s" % list(init_numa)) - - -class ProcDict(dict): - """this is to store the process information""" - key_list = ['state', 'vmem', 'rss', 'sz', 'cpu_time', 'elap_time', 'private', 'shared'] - pid = -1 - start_time = -1 - init_time = -1 - out="" - err="" - elap_time = -1 - cpu_time = -1 - vmem = -1 - rss = -1 - nbr_rows = -1 # the number of rows - - def __init__(self, pid=-1, start_time=-1, out="", err="", child = False): - self.pid = pid - self.start_time = start_time - #self['np']=np - self.out = out - self.err = err - - for key in self.key_list: - self[key] = list() - self.proc_ps_stat() - if child: - mp_stat["cpid"][self.pid] = self - else: - mp_stat["pid"][self.pid] = self - - def add_ps_line(self, line): - ps_str = line.split() - if self.start_time is -1: - self.start_time = _seconds(ps_str[5]) - self['state'].append( ps_str[1] ) - self['vmem'].append( int(ps_str[2])/1024 ) - self['rss'].append( int(ps_str[3])/1024 ) - self['sz'].append( int(ps_str[4])/1024 ) - self['cpu_time'].append( _seconds(ps_str[6]) ) - self['elap_time'].append( _seconds(ps_str[7]) ) - - if self.init_time==-1: - global T0 - if grepExist(self.out, "'start processing'"): - self.init_time = time.time()- T0 - print ("pid-%i: init_time=%s"% (self.pid, self.init_time)) - - private = shared = -1 - try: - pass - #private, shared = _get_shared_private_mem(self.pid) - except Exception as e: - print ("## Caught exception [%s] !!" % str(e.__class__)) - print ("## What:", e) - print (sys.exc_info()[0]) - print (sys.exc_info()[1]) - self["private"].append(private) - self["shared"].append(shared) - - def proc_ps_stat(self): - """ ps statistics for this process of pid """ - out = subprocess.getoutput("ps --pid %i -o pid,state,vsize,rss,sz,start,cputime,etime" % self.pid) - lines = out.splitlines() - if len(lines) > 1: - self.add_ps_line(lines[1]) - else: - print ("there is no process with pid: [%i]", self.pid) - return False - return True - - def children_exist(self): - """ figures out weather the np kids were spawned for mother mpid""" - sc, out = subprocess.getstatusoutput("ps --ppid %i -o pid,start" % self.pid) - if sc is not 0: - #print (" children_exist: Error, sc=%i" % sc) - return False - - ps_lines = out.splitlines() - nc = len(ps_lines)-1 - print (" children_exist().nbr of children = %i" % nc) - if nc > 0 : - print ("%i children workers exist. Creating ProcDicts..." % nc) - ps_lines.pop(0) - for line in ps_lines: - ps_str = line.split() - cpid = int(ps_str[0]) - ProcDict(cpid, start_time = _seconds(ps_str[1])) - print ("..... child [%i] added" % cpid) - return nc - else: - #print ("no children exist for parent: %s " % self.pid) - return False - - - def trim_stat(self): - """trim finished process information at the end of statistics """ - while True: - if (self['state'][-1] in ['S', 'Z', 'T', 'D', 'X']) or (self['vmem'][-1] is 0) or (self['rss'][-1] is 0): #STOPPED, ZOMBIE, STOPPED2, Dddd, Xxxx - for key in self.key_list: - self[key].pop() - else: - break - - def summarize(self): - """finalize summary information.""" - self.elap_time = self['elap_time'][-1] - self.cpu_time = self['cpu_time'][-1] - self.vmem = self['vmem'][-1] - self.rss = self['rss'][-1] - self.nbr_rows = len(self['elap_time']) - -class SPSummary(dict): - """ Sub Processes Summary - convenient way of presenting to ROOT or other output methods""" - cpid_list=list() - np = 1 - total_cpu_time = 0 - total_cpu_time_x = 0 - total_elap_time = 0 - total_elap_time_x = 0 - - def __init__(self, np): - #self['np']=list() - self.np = np - self['pid']=list() - #self['elap_time']=list() - self['elap_time_x']=list() - #self['cpu_time']=list() - #self['user_time']=list() - #self['system_time']=list() - #self['bootstrap_time']=list() - self['cpu_time_x']=list() - self['vmem']=list() - self['rss']=list() - self['free_mem_spike']=list() - self['init_time_x'] = list() - - def extract_summary(self, dir): - self.spid_list = mp_stat["pid"].keys() - for pid in mp_stat["pid"].keys(): - print ("extract_summary: pid %i" % pid) - self['pid'].append(pid) - self['init_time_x'].append(mp_stat['pid'][pid].init_time) - self['elap_time_x'].append(mp_stat['pid'][pid].elap_time) - self['cpu_time_x'].append(mp_stat['pid'][pid].cpu_time) - self['vmem'].append(mp_stat['pid'][pid].vmem) - self['rss'].append(mp_stat['pid'][pid].rss) - self["free_mem_spike"].append( get_spike(sumList(mp_stat['mem']['kbmemfree'], - sumList(mp_stat['mem']['kbbuffers'], - mp_stat['mem']['kbcached']) ))) - for pid in self.cpid_list: - print (" %s/%s exists ->" % (dir,pid), os.path.exists(os.path.join(dir,"%s" % pid))) #FIX: add the extraction from cpid's logs. - out_path = os.path.join(dir, 'stdout') - err_path = os.path.join(dir, 'stderr') - - #e_time = int(grepValue(out_path, "WORKER_ELAP_TIME")); - #u_time = int(grepValue(out_path, "WORKER_USER_TIME")); - #s_time = int(grepValue(out_path, "WORKER_SYSTEM_TIME")); - #c_time = u_time + s_time - - #self['elap_time'].append(e_time); self.total_elap_time += e_time; - #self['user_time'].append(u_time) - #self['system_time'].append(s_time) - #self['cpu_time'].append(c_time); - #self.total_cpu_time += c_time; - #b_time = int(grepValue(out_path, "BOOTSTRAP_ELAP_TIME")); - #self['bootstrap_time'].append(b_time); - - -class MPSummary(dict): - """ Mother Process Summary - convenient way of presenting data to ROOT...""" - mpid = dict() - np = 1 - def __init__(self, np): - mpid = mp_stat['pid'] - self.np = np - #self['np'] = list() - #self['np'] = [ np, ] - self["m_par_time"] = list() - self["m_firstevent_time"]= list() - self["m_cpu_time"]= list() - self["m_elap_time"]= list() - self["x_init_time"]= list()#externally observed time - self["x_par_time"]= list() #externally observed time - self["x_fin_time"]= list() #externally observed time - self["free_mem_spike"]= list() #externally observed value - self["swap_change"] = list() - - def extract_summary(self, log): - self["m_par_time"] = [0,]# int(grepValue(log, "PARALLEL_ELAP_TIME")),] - self['m_merging_time'] = [0,]# int(grepValue(log, "MERGING_ELAP_TIME")),] - self["m_firstevent_time"] = [0,]# int(grepValue(log, "FIRSTEVENT_ELAP_TIME")),] - self["m_cpu_time"] = [ mp_stat["pid"].cpu_time, ] #mp_stat["mpid"]["cpu_time"][-1] - self["m_elap_time"]= [ mp_stat["pid"].elap_time, ] #mp_stat["mpid"]["elap_time"][-1] - self["free_mem_spike"]= [ get_spike(sumList(mp_stat['mem']['kbmemfree'], - sumList(mp_stat['mem']['kbbuffers'], mp_stat['mem']['kbcached'])) - ), ] - self["swap_change"] = [ get_spike(mp_stat['mem']['kbswpfree']), ] - - - - -####### useful auxillary functions ########################## -def _seconds(time_str): #handles time in "H:M:S" and "M:S" format - time_nums = time_str.split(":") - if (len(time_nums)==3): - return 3600*int(time_nums[0])+60*int(time_nums[1]) + int(time_nums[2]) - elif (len(time_nums)==2): - return 60*int(time_nums[0]) + int(time_nums[1]) - print ("ERROR: _seconds() returning - 0") - return 0 - -def show_numactl(): - sc,out=subprocess.getstatusoutput("numactl --show") - if sc==256: - print ("mjMonTools.show_numactl: numastat is not working! zeroes will be returned") - return False - else: - print ("mjMonTools.show_numactl: \n %s" % out) - return True - -def get_numastat(): - sc,out=subprocess.getstatusoutput("numastat") - if sc==256: - print ("mjMonTools.get_numastat: numastat is not working! zeroes will be returned") - return (0,0,0,0,0,0,0) - else: - lines = out.splitlines() - return (time.time(), - int(lines[1].split()[1]), - int(lines[2].split()[1]), - int(lines[3].split()[1]), - int(lines[4].split()[1]), - int(lines[5].split()[1]), - int(lines[6].split()[1]) - ) - -def save_numastat(): - current_numa = get_numastat() - #print ("current_numa=%s" % list(current_numa)) - - _numa_stat = ( - mp_stat['numa']['Time'], - mp_stat['numa']['numa_hit'], - mp_stat['numa']['numa_miss'], - mp_stat['numa']['numa_foreign'], - mp_stat['numa']['interleave_hit'], - mp_stat['numa']['local_node'], - mp_stat['numa']['other_node'] - ) - - change_numa = subList(current_numa,init_numa) - print ("NUMA_CHANGE=%s" % change_numa) - return [_numa_stat[i].append(change_numa[i]) for i in range(len(change_numa))] - -def print_memstat(msg =""): - mem = get_memstat() - t = time.time() - T0; - save_numastat() - print (msg + " [T=%i sec]" % t + " USED[%i Mb][change: %i Mb] - FREE[%i Mb][change: %i Mb]" % ( - mem["USED"], mem["USED"]-init_mem["USED"], mem["FREE"], mem["FREE"]-init_mem["FREE"])) - -def get_memstat(): - out=subprocess.getoutput("free -m") - mem = dict() - lines = out.splitlines() - mem_strs = lines[1].split() - mem['used'] = int(mem_strs[2]) - mem['free'] = int(mem_strs[3]) - mem['cached'] = int(mem_strs[5]) - mem['buffers'] = int(mem_strs[6]) - mem_strs = lines[2].split() - mem['USED'] = int(mem_strs[2]) - mem['FREE'] = int(mem_strs[3]) - #print ("mem: [%s Mbs]" % mem) - return mem - -init_mem = get_memstat() - -def meanList(num_list): - """finds average value of the number list""" - if len(num_list) == 0: - print ("meanList: WARNING - empty list, returning 0.0") - return 0.0 - return float(sum(num_list)) / len(num_list) - -def sumList(l1, l2): - """sum up values of two lists l1 + l2""" - if len(l1) is not len(l2): - print ("sumList: WARNING: len(l1) not equals len(l2)") - n = len(l1) if len(l2) > len(l1) else len(l2) - else: - n = len(l1) - - sum = list() - for i in range(n): - sum.append(l1[i] + l2[i]) - return sum - -def subList(l1, l2): - """subtract values of two lists: l1 - l2""" - if len(l1) is not len(l2): - print ("subList: WARNING: len(l1) not equals len(l2)") - n = len(l1) if len(l2) > len(l1) else len(l2) - else: - n = len(l1) - - sub = list() - for i in range(n): - sub.append(l1[i] - l2[i]) - return sub - -def get_spike(l): - #print (" get_spike:", end='') - #print (" e0 = ", l[0]/1024, "Mb", end='') - #print (" eN = ", l[-1]/1024, "Mb", end='') - #print (" max = ", max(l)/1024, "Mb", end='') - #print (" min = ", min(l)/1024, "Mb", end='') - #print (" e0 - eN = ", (l[0] - l[-1])/1024, "Mb", end='') - #print (" e0 - min = ", (l[0] - min(l))/1024, "Mb", end='') - #print (" eN - min = ", (l[-1] - min(l))/1024, "Mb", end='') - #print (" return max - min =", (max(l) - min(l))/1024, "Mb") - return max(l) - min(l) - -def prepare_mp_stat(): - """ this function construct "cp" dict from "cpid" dict for ROOT formation convenience - converting: cpid:pid:vmem,rss,cpu-time,... -> cp:pid,vmem,rss,cputime,... """ - mp_stat['sp'] = mp_stat['pid'] - del mp_stat['pid'] - mp_stat['pid']=dict() - mp_stat['pid']['pid'] = list() - - for key in ProcDict.key_list: - mp_stat['pid'][key]=list() - - for pid in mp_stat['sp'].keys(): - mp_stat['pid']['pid'] += [pid,] * mp_stat['sp'][pid].nbr_rows - for key in ProcDict.key_list: - mp_stat['pid'][key] += mp_stat['sp'][pid][key] - del mp_stat['sp'] - - -def print_summary(): - print ("===== SUB PROCESS SUMMARY =====") - for (k, v) in mp_stat['sp_summary'].items(): - print ("sp_summary['%s']=%s " % (k, v)) - -################## children tools ###################### -def launched_processes_working(ppid): - """ ps statistics for children of ppid. returns False if no children exist """ - out = subprocess.getoutput("ps --ppid %i -o pid,state,vsize,rss,sz,start,cputime,etime" % ppid) - ps_lines = out.splitlines() - ps_lines.pop(0) - - exist = False # switch for existance of launched processes (not any processes) - - if len(ps_lines) > 0: - print ("Subprocesses exist:") - for line in ps_lines: - ps_str = line.split() - pid = int(ps_str[0]) - #print ("subprocess pid=%i" % pid) - if pid in mp_stat["pid"].keys(): - exist = True - mp_stat["pid"][pid].add_ps_line(line) - print ("pid-%i: ps-stat appended" % pid) - else: - print ("pid-%i: secondary proc" % pid) - return exist - else: - print ("No subprocesses exist for parent: %i" % ppid) - return exist #False - return exist #False - - -def summarize_children_stat(): - """trim finished worker information at the end of statistics """ - for pid in mp_stat["cpid"].keys(): - mp_stat['cpid'][pid].trim_stat() - mp_stat['cpid'][pid].summarize() - -def summarize_proc_stat(): - """summarize proc finished worker information at the end of statistics """ - #mp_stat['mpid'].summarize() - for pid in mp_stat['pid'].keys(): - #mp_stat['pid'][pid].trim_stat() - mp_stat['pid'][pid].summarize() - -def children_born(log, mpid, np): - """ figures out weather the np kids were spawned for mother mpid""" - sc,out = subprocess.getstatusoutput("ps --ppid %i -o pid,start" % mpid) - if sc is not 0: - print (" mpMonTools.children_born: no kids yet... Error, sc=%i" % sc) - return False - - ps_lines = out.splitlines() - #print ("ps_lines=", ps_lines) - nc = len(ps_lines)-1 - - print (" children_exist: nbr of children = [%i]" % nc) - if grepValue(log, "FIRSTEVENT_ELAP_TIME") is None: - return False - - if nc==np : #nbr of children is equal to nbr of procs required - print ("%i children workers forked! Registering them (creating ProcDicts) ..." % np) - ps_lines.pop(0) - for line in ps_lines: - ps_str = line.split() - pid = int(ps_str[0]) - ProcDict(pid, start_time = _seconds(ps_str[1])) - print ("..... child [%i] added" % pid) - return True - else: - print ("no children exist for parent: %s " % mpid) - return False - - -################ LOG VARIABLES PROCESSING: Grepping ################## -################ LOG VARIABLES PROCESSING: Grepping ################## -def grepExist(log, field): - """grep check for the existance of the unique field in the log - """ - print ("grep %s %s" % (field, log),) - sc,out = subprocess.getstatusoutput( "grep %s %s" % (field, log)) - if sc==256: - print (" FALSE: sc=%i" % sc) - return False - line = out.splitlines()[0] - print (" TRUE: sc=%i \n grepped-line=%s" % (sc,line)) - return True - -def grepExist2(log, field): - """grep check for the existance of the unique field in the log - """ - print ("grep %s %s" % (field, log)) - sc,out = subprocess.getstatusoutput( "grep %s %s" % (field, log)) - if sc!=0: - print ("grepping %s in %s failed with sc=%i" % (field, log, sc) ) - return False - line = out.splitlines()[0] - print ("grepped-line=%s" % line) - print ("sc=", sc) - - return True - -def grepValue(log, field, sep='='): - """grep unique field in the log and find corresponding value by regexp - Example: out = 'Py:EventLoopMgr INFO EvtMax = 123456 something' - grepValue(log, "EvtMax", sep="=") = '123456' - """ - sc,out = subprocess.getstatusoutput( "grep %s %s" % (field, log)) - if sc!=0: - #print ("grepping %s in %s failed" % (field, log)) - return None - line = out.splitlines()[0] - import re - vexpr = '\\s*'+ sep+ '\\s*(\\d+)' - m = re.search( field + vexpr, line) - value = m.group(1) - return value - -def grepPath(log, field, sep=':'): - """grep unique field in the log and find corresponding value by regexp - Example: out = 'Py:EventLoopMgr INFO master workdir: /tmp/athena-mp-tmp-tmmous/22590-1261097934 smthng' - grepPath(log, "workdir", sep=":") = '/tmp/athena-mp-tmp-tmmous/22590-1261097934' - """ - sc,out = subprocess.getstatusoutput( "grep %s %s" % (field, log)) - if sc!=0: - print ("grepping %s in %s failed" % (field, log)) - return None - line = out.splitlines()[0] - import re - vexpr = '\\s*'+ sep+ '\\s*([^\\s]+)' - m = re.search( field + vexpr, line) - path = m.group(1) - return path - - -############# related to athena-mp ######################### -def launch_athena(jobo, ne, se, np, output_dir, numa_set=None): - """launching cmd: athena.py -c EvtMax=$ne $jobo 1> mp.output/stdout_$jobo.$np.$ne 2> mp.output/stderr_$jobo.$np.$ne""" - - if not os.path.isdir(output_dir): - os.mkdir(output_dir) - - numa_args = list() - - print ("job command and options as template: %s" % jobo) - from string import Template - arg_template= Template(jobo) - arg_str = arg_template.substitute(MAXEVT=ne, SKIPEVT=se) - proc_args = arg_str.split(); - job = proc_args[0] - - stdout_name = os.path.join(output_dir, "stdout.%s.%i.%i" % (job,np,ne)) - stderr_name = os.path.join(output_dir, "stderr.%s.%i.%i" % (job,np,ne)) - STDOUT_FILE = open(stdout_name , "w") - STDERR_FILE = open(stderr_name, "w") - - #proc_args = ["athena.py", "-c", "EvtMax=%i; SkipEvents=%i" % (ne, se) , "../%s" % jobo] - print ("job command and options after template processing: %s" % proc_args) - - if numa_set != None: - numa_args = [ "numactl"] - if (numa_set[0] == 'f' and numa_set[1] == 'f'): - numa_args = list() - elif ( numa_set[0]=='f' and numa_set[1]!='f'): - numa_args.append( "--membind=%i" % numa_set[1]) - elif ( numa_set[0]!='f' and numa_set[1]=='f'): - numa_args.append( "--cpubind=%i" % numa_set[0]) - elif (numa_set[0]!='f' and numa_set[1]!='f'): - numa_args += ["--membind=%s" % numa_set[0], "--cpubind=%s" % numa_set[1] ] - else: - print ("SOMETHING WRONG: numa_set=%s" % numa_set) - - - #proc_args = [ "numactl", - # "--cpubind=%i" % numa_set[0], - # "--membind=%i" % numa_set[1], - # "athena.py", "-c", "EvtMax=%i; SkipEvents=%i" % (ne, se) , "../%s" % jobo] - - proc_args = numa_args + proc_args - - print ("<<<LAUNCH>>>: %s" % proc_args) - mproc = subprocess.Popen( proc_args, - stdout=STDOUT_FILE, - stderr=STDERR_FILE, - cwd = output_dir, - shell=False, - close_fds = True) - pid_list.append(mproc.pid) - STDOUT_FILE.close(); STDERR_FILE.close() - proc = ProcDict(mproc.pid, out=stdout_name, err = stderr_name, child=False) - - return (mproc, stdout_name, stderr_name) - -############# TERMINATOR AREA: stopping, killing, terminating processes ############### - -def stop_proc(proc): - """ terminate/kill a process by either proc_object or pid""" - pid = 0 - try: - if type(proc) is int: - pid = proc - os.kill(pid, signal.SIGKILL); #os.waitpid(pid, 0); - else: - pid = proc.pid - if proc.poll() is None: os.kill(pid, signal.SIGKILL); - proc.wait(); - print ("process %s terminated" % pid ) - except Exception as e: - print ("## Caught exception [%s] !!" % str(e.__class__)," ## What:",e) - print (sys.exc_info()[0], sys.exc_info()[1]) - return False - pid_list.remove(pid) - return True - -def stop_proc_tree(pid): - """ Terminate/kill recursively process tree by pid. Be precautious using this!""" - out = subprocess.getoutput("ps --ppid %i" % pid) - lines = out.splitlines(); lines.pop(0) #remove header - try: - if len(lines) > 0: - for line in lines: - cpid = int(line.split()[0]) - print ("child [%i:%i] being terminated..." % (pid, cpid)) - stop_proc_tree(cpid) - if pid in pid_list: pid_list.remove(pid) - os.kill(pid, signal.SIGKILL); #os.waitpid(pid, 0); - print ("[%i] - terminated." % pid ) - except Exception as e: - print ("[%i] - dead #while killing caught exception [%s] !!" % (pid, str(e.__class__))," ## What:",e) - #print (sys.exc_info()[0], sys.exc_info()[1]) - return False - return True - - -def stop_athenaMP(mproc): - """ terminate/kill a process by either proc_object or pid""" - try: - pid = 0 - if type(mproc) is int: - pid = mproc - stop_proc_tree(pid) # killing athena-mp mproc-tree - else: - pid = mproc.pid; - if mproc.poll() is None: os.kill(pid, signal.SIGKILL); - mproc.wait(); - print ("process %s terminated" % pid ) - return True - except Exception as e: - print ("## Caught exception [%s] !!" % str(e.__class__)," ## What:",e) - print (sys.exc_info()[0], sys.exc_info()[1]) - return (False) - return False - - - - -############# sar related wrappers ########################### - -def launch_sar(log, time_step): - """ - launch sar with - `sar -bBcdqrRuvwWy -I SUM -I XALL -n ALL -P ALL` = `sar -A` - """ - sar_args = [ "sar", "-bBrvwu", "-o", log, "%i" % time_step, "0" ] - print ("launching: %s %s %s %s %s %s" % tuple(sar_args) ) - sc,out = subprocess.getstatusoutput('sar -b 1 1') - if sc!=0: - print ('launching failed - sar do not work on this system - please install if available!') - return None - FNULL = open('/dev/null', 'w') - proc = subprocess.Popen(sar_args, - executable="sar", - stdout = FNULL, - stderr = subprocess.STDOUT, - shell=False, - close_fds = True) - FNULL.close() - pid_list.append(proc.pid) - return proc - -def _num(str): - """try converting str into int or float if fails return the same string""" - try: - if "." in str: - out = float(str) - else: - out = int(str) - except ValueError: - return str - return out - -def get_sar_stat(log, key): - """ get statistics by issueing this cmd: `sar -key $log`""" - print ('launching cmd: sar %s -f %s' % (key, log) ) - sc,out = subprocess.getstatusoutput("sar %s -f %s" % (key,log) ) - if sc!=0: - print ("launching failed - either file %s does not exist or sar does not work on this system - please check!" % log) - return None - sar_dict = dict() - #print("##################################"); print ("out=\n", out; print "################################################") - - lines = out.splitlines() - print ("trim1=", lines.pop(0))#trimming output - print ("trim2=", lines.pop(0))#trimming output - - avg_line = lines.pop(); #trimming avg line at the end - print ("avg_line1=", avg_line) - - hstrs = lines.pop(0).replace('%', 'p').replace('/', 'p').split() #trimming header strings and replacing '%' and '/' to satisfy ROOT - hstrs[0] = "Time" - print ("Sar statistics fields found: ", hstrs) - - #print"(##################################"; print "lines=\n", lines; print "################################################") - - for hstr in hstrs: - sar_dict[hstr] = list() - for line in lines: - lstrs = line.split() - #print ("lstrs=", lstrs) - for i,hstr in enumerate(hstrs): - if i!=0: - sar_dict[hstr].append( _num(lstrs[i]) ) - else: - sar_dict[hstr].append(_seconds(lstrs[i])) #time conversion from "H:M:S" --> numero segundos - return sar_dict - - -### Tools for process statistics ####### - -def get_full_sar_stat(log): - mp_stat["io"] = get_sar_stat(log, "-b") - mp_stat["mem"] = get_sar_stat(log, "-r") - mp_stat["cpu"] = get_sar_stat(log, "-u") - #return mp_stat - - - ##############sysstat and other linux commands wrappers######## - -def _meminfo(): - out=subprocess.getoutput("cat /proc/meminfo") - lines = out.splitlines() - mem=dict() - Kb = 1024 - mem['total']= int(lines[0].split()[1]) / Kb - mem['free'] = int(lines[1].split()[1]) / Kb - mem['buffers']= int(lines[2].split()[1]) / Kb - mem['cached'] = int(lines[3].split()[1]) / Kb - print ("meminfo.real_total: [%i Mb]", mem['total'] ) - print ("meminfo.free: [%i Mb]", mem['free']) - print ("meminfo.cached: [%i Mb]", mem['cached'] ) - print ("meminfo.buffers: [%i Mb]", mem['buffers']) - return mem - -def _get_iostat(): - out=subprocess.getoutput("iostat") - io = dict() - lines = out.splitlines() - strs = lines[1].split() - io['used'] = int(strs[2]) - mem=dict() - mem['free'] = int(strs[3]) - mem['cached'] = int(strs[5]) - mem['buffers'] = int(strs[6]) - mem_strs = lines[2].split() - mem['USED'] = int(strs[2]) - mem['FREE'] = int(strs[3]) - #print ("mem: [%s Mbs]" % mem) - return io -def _used_mem(): - out=subprocess.getoutput("free -m") - mem_strs = out.splitlines()[2].split() - used_mem = int(mem_strs[2]) - print ("used_mem: [%i Mb]" % used_mem) - return used_mem -def _free_mem(): - out=subprocess.getoutput("free -m") - mem_strs = out.splitlines()[2].split() - free_mem = int(mem_strs[3]) - print ("free_mem: [%i Mb]" % free_mem) - return free_mem - -def _launch_iostat(log, time_step): - print ('launching cmd: iostat $TIME_STEP -d -x > iostat.$jobo.$np.$ne &') - sc,out = subprocess.getstatusoutput( "iostat" ) - if sc!=0: - print ('launching failed - iostat do not work on this system') - return None - f_iostat = open(log, "w") - iostat_proc = subprocess.Popen( - [ "iostat", "%i" % time_step, "-d", "-x"], - executable="iostat", - stdout = f_iostat, - shell=False, - close_fds = True) - - f_iostat.close() - return iostat_proc -def _launch_vmstat(log, time_step): - print ('launching cmd: vmstat $TIME_STEP -n > vmstat.$jobo.$np.$ne &' ) - sc,out = subprocess.getstatusoutput( "vmstat -V" ) - if sc!=0: - print ('launching failed - vmstat do not work on this system') - return None - file = open(log, "w") - proc = subprocess.Popen([ "vmstat", "%i" % time_step, "-n" ], - executable="vmstat", - stdout = file, - shell=False, - close_fds = True) - file.close() - return proc -def __create_childProcDicts(ppid): - """ creates stats dictionary with """ - out = subprocess.getoutput("ps --ppid %i -o pid, start" % ppid) - ps_lines = out.splitlines() - ps_lines.pop(0) - - if len(ps_lines) > 1: - - for line in ps_lines: - ps_str = line.split() - pid = int(ps_str[0]) - ProcDict(pid, start_time = _seconds(ps_str[1])) - print ("ppid: [%i]: child [%i] added" % (ppid, pid)) - else: - print ("no children exist for parent: %s " % ppid) - - -####### adopted from AthenaMP/PyComps ################### -def print_shared_private(pid): - print ("CPROC-SHARED_PRIVATE_MEM for pid: [%i]" % pid) - for line in open("/proc/%i/status" % pid): - if line.startswith('Vm'): - print(line.strip()) - private,shared=_get_shared_private_mem() - print ("pid:[%i] ===> private: %s MB | shared: %s MB" % (pid, private/1024., shared /1024.)) -def _get_shared_private_mem(pid='self'): - """ Finds proc's shared and private memory size from /proc/pid/statm and /proc/pid/smaps dir - Coppied from AthenaMP/PyComps.py""" - global have_pss - private_lines=[] - shared_lines=[] - pss_lines=[] - statm_name = "/proc/%s/statm" % pid - smaps_name = "/proc/%s/smaps" % pid - rss=int(open(statm_name).readline().split()[1])*PAGESIZE - if os.path.exists(smaps_name): #stat - for line in open(smaps_name).readlines(): #open - if line.startswith("Shared"): - shared_lines.append(line) - elif line.startswith("Private"): - private_lines.append(line) - elif line.startswith("Pss"): - have_pss=1 - pss_lines.append(line) - shared=sum([int(line.split()[1]) for line in shared_lines]) - private=sum([int(line.split()[1]) for line in private_lines]) - #Note shared + private = rss above - #The rss in smaps includes video card mem etc. - if have_pss: - pss_adjust=0.5 #add 0.5KiB as this average error due to trunctation - Pss=sum([float(line.split()[1])+pss_adjust for line in pss_lines]) - shared = Pss - private - #elif (2,6,1) <= kv <= (2,6,9): - # shared=0 #lots of overestimation, but what can we do? - # private = rss - else: - shared=int(open(statm_name).readline().split()[2]) - shared*=PAGESIZE - private = rss - shared - return (private, shared) - - -################################################# -############# ROOT Output ####################### - -def _createRootFile(outName): - """creating carcasus of report ROOT file""" - print ("create ROOT file...") - from PerfMonAna.PyRootLib import importRoot - from ROOT import TTree - import array - ROOT = importRoot( batch = True ) - outFile = ROOT.fopen( outName, 'RECREATE' ) - outFile.cd("/") - - i = array.array( 'i', [0] ) - d = array.array( 'f', [0.] ) - - outFile.cd() - tree = TTree( "io", "IO statistics tree") - tree.Branch('Time', i, 'int/I') - tree.Branch('tps',d,'float/D') #transfers per second - tree.Branch('rtps',d,'float/D') #read transfers per second - tree.Branch('wtps',d,'float/D') #write transfers per second - tree.Branch('breadps',d,'float/D') #blocks read per second - tree.Branch('bwrtnps',d,'float/D') #blocks written per second - tree.Write(); - - outFile.cd() - tree = TTree( "mem", "Mem statistics tree") - tree.Branch('Time', i, 'int/I') - tree.Branch('kbmemfree', i, 'int/I') #free in kB - tree.Branch('kbmemused', i, 'int/I') #used in kB - tree.Branch('pmemused', d, 'float/D') #used in kB - tree.Branch('kbbuffers', i, 'int/I') #buffers in kB - tree.Branch('kbcached', i, 'int/I') #cached in kB - tree.Branch('kbswpfree', i, 'int/I') #swap free in kB - tree.Branch('kbswpused', i, 'int/I') #swap used in kB - tree.Branch('pswpused', d, 'float/D') - tree.Branch('kbswpcad', i, 'int/I') - tree.Write(); - - outFile.cd() - tree = TTree("cpu", "CPU statistics tree") - tree.Branch('Time', i, 'int/I') - tree.Branch('CPU', i,'int/I') #CPU number or ALL - tree.Branch('puser',d,'float/D') # CPU utilization percentage at user level - tree.Branch('pnice',d,'float/D') # CPU utilization at nice level - tree.Branch('psystem',d,'float/D') # CPU utilization at system level - tree.Branch('piowait',d,'float/D') # CPU idle percentage due to IO-wait - tree.Branch('psteal',d,'float/D') # virtual processes wait percentage - tree.Branch('pidle',d,'float/D') # CPU idling due to non IO reasons - tree.Write(); - - outFile.cd() - tree = TTree( "cp", "Children processes statistics") - tree.Branch('Time', i, 'int/I') # start date/time - tree.Branch('pid', i , 'int/I') # pid of worker process - tree.Branch('state', i , 'int/I') #state of the process at the moment (important at the end of process) - tree.Branch('vmem', i, 'int/I') - tree.Branch('rss', i, 'int/I') - tree.Branch('sz', i, 'int/I') # Size in physical pages of the core image of the process. This includes text, data, and stack space. - tree.Branch('shared', i, 'int/I')# shared memory as extracted from /proc/pid/smaps - tree.Branch('private', i, 'int/I')# private memory as extracted from /proc/pid/smaps - tree.Branch('cpu_time', i,'int/I')# cpu_time - tree.Branch('elap_time', i, 'int/I')# elapsed time (Wall Clock time) - tree.Write() - - outFile.cd() - tree = TTree( "cp_summary", "children processes summary tree") - tree.Branch('pid', i, 'int/I') - tree.Branch('elap_time', i , 'int/I') - tree.Branch('cpu_time', i, 'int/I') - tree.Write(); - - outFile.cd() - tree = TTree( "mpid", "Mother Process statistics tree") - tree.Branch('pid', i , 'int/I') - tree.Branch('state', i , 'int/I') - tree.Branch('vmem', i, 'int/I') - tree.Branch('rss', i, 'int/I') - tree.Branch('sz', i, 'int/I') - tree.Branch('shared', i, 'int/I') - tree.Branch('private', i, 'int/I') - tree.Branch('cpu_time', i,'int/I') - tree.Branch('elap_time', i, 'int/I') - tree.Write() - - outFile.cd() - tree = TTree( "mp_summary", "Mother process summary tree") - tree.Branch('pid', i, 'int/I') - tree.Branch('m_elap_time', i , 'int/I') #mother process total elapsed time - tree.Branch('m_cpu_time', i, 'int/I') #mother process CPU-time (children-worker's time not included) - tree.Branch('m_firstevent_time', i, 'int/I') #time spent on firstEvents(after init before forking) - measured inside mother process - tree.Branch('m_par_time', i, 'int/I') #parallel time - time from forking till collecting data - measured inside mother process - tree.Branch('x_init_time', i, 'int/I') #externally observed MP SERIAL-INIT executin time (from start till forking) - tree.Branch('x_par_time', i, 'int/I') #externally observed MP PARALLEL execution time (from forking till joining) - tree.Branch('x_fin_time', i, 'int/I') #externally observed MP FINALIZATION time (from joining till end of job) - tree.Branch('free_mem_spike', i, 'int/I') #spike in "free+buffers+cached" at the end of MP-process -> physical used memory released. - tree.Branch('swap_change', i, 'int/I') # change in swap, should be zero if not means MP causing a swapping - #tree.Branch('used_mem_spike', i, 'int/I') - tree.Branch('event_proc_rate', i, 'float/D') #event processing rate defined as ne*60*m_par_time/np = events/proces/min - tree.Write() - - outFile.cd() - outFile.Write() - outFile.Close() - print ("create ROOT file... [DONE]" ) - return - -def createRootFile(outName, np): - """creating structure of ROOT-report file from mp_stat dictionary """ - print ("create ROOT file...") - - from PerfMonAna.PyRootLib import importRoot - from ROOT import TTree - import array - ROOT = importRoot( batch = True ) - outFile = ROOT.fopen( outName, 'RECREATE' ) - print ("ROOT.fopened") - - outFile.cd("/") - - i = array.array( 'i', [0] ) - d = array.array( 'f', [0.] ) - - for t in mp_stat.keys(): - tree = TTree( t, "%s stat tree" % t) - tree.Branch('np', i, 'int/I') # each tree will have 'np' branch - for b in mp_stat[t].keys(): - #print ("tree=%s, branch=%s" % (t,b)) - if isinstance(mp_stat[t][b][0], int): - tree.Branch(b, i, 'int/I') - elif isinstance(mp_stat[t][b][0], float): - tree.Branch(b, d,'float/F') - else: - #print ("branch [%s] is not int or float type" % b) - tree.Branch(b, i, 'int/I') - tree.Write() - outFile.Write() - outFile.Close() - print ("create ROOT file... [DONE]") - - -def fillRootTree(tree, stat, np): - #print ("writing %s statistics Tree:" % tree.GetName(),) - branches = stat.keys() - #print (" branches=", branches, "...", ) - nbr = len(branches) - array_list = list() - - np_array = array.array('i', [np]) - tree.SetBranchAddress('np', np_array) #putting 'np' into each tree. - for branch in branches: - #print ("fillRT: branch=%s" % branch) - if isinstance(stat[branch][0], float): - f = stat[branch][0] - nums = array.array('f', [0.0]) - array_list.append(nums) - elif isinstance(stat[branch][0], int): - i = stat[branch][0] - nums = array.array('i', [0]) - array_list.append(nums) - else: - #print ("branch [%s] is not int or float type" % branch) - nums = array.array('i', [-1]) - array_list.append(nums) - tree.SetBranchAddress(branch, array_list[-1]); - - for index in range(len(stat[branches[0]])): - for array_index, branch in enumerate(branches): - #print ("stat[branch=%s][index=%i] array_index=%i " % (branch, index, array_index)) - array_list[array_index][0] = stat[branch][index] if array_list[array_index][0] is not -1 else -1 - tree.Fill() - #print ("[DONE]") - - - -def writeRootFile(outName, np): - """writes statistics into ROOT file""" - print ("write ROOT file %s...", outName ) - createRootFile(outName, np) - from ROOT import TFile, TTree - import array - outFile = TFile( outName, 'update' ) - - stat_keys = mp_stat.keys() - #print ("mp_stat.keys()", stat_keys) - for key in stat_keys: - #print (" writing [%s]" % key) - tree = outFile.Get( "%s" % key ) - fillRootTree(tree, mp_stat[key], np) - tree.Write() - - outFile.Write() - outFile.Close() - print ("write ROOT file... [DONE]" ) - return - -def mergeRootFiles(file, ne): - import glob - file_list = glob.glob1(os.getcwd(), "%s.*.%i.root" % (file, ne) ) - cmd = "hadd -f6 mp_stat.%s.ne%i" % (file, ne) - for f in file_list: - cmd = cmd + ' ' + f - sc, out = subprocess.getstatusoutput(cmd) - -def mergeRootOutput(output_file, jobo, np_list, ne): - from ROOT import TFile, TTree - #output_file = "merged.%s.ne%i.root" % (jobo, ne) - cmd = "hadd -f6 %s" % output_file - for np in np_list: - # here we copy mp_summary and cp_summary trees in each root file from /$np dir into root dir for further merging - file = "mj.%s.%i.%i.root" % (jobo, np, ne) - print (" ---> processing file = %s" % file) - #here we form the command for merging - cmd = cmd + " %s" % file - - print ("issueing root files merging command:[%s]" % cmd) - sc, out = subprocess.getstatusoutput(cmd) - return #output_file - -def _createGlobalRootFile(file, ne): - from ROOT import TFile, TTree, Tlist - import glob - file_list = glob.glob1(os.getcwd(), "%s.*.%i.root" % (file, ne) ) - outFile = TFile ("%s.%i.root" % (file, ne), 'RECREATE' ) - for f in file_list: - print ("Copying trees from [%s]" % f) - tf = TFile (f, 'READ' ) - mpt = tf.Get("mp_summary") - cpt = tf.Get("cp_summary") - outFile.cd('/') - dir = "%s" % f.replace(file, "").split(".")[1] - print (" creating dir for np = %s" % dir) - outFile.mkdir(dir) # creating dir for np - outFile.cd(dir) - mpTree = mpt.CloneTree(); mpTree.Write() - cpTree = cpt.CloneTree(); cpTree.Write() - outFile.Write() - tf.Close() - - outFile.cd('/') - - ikeys = outFile.GetListOfKeys().MakeIterator() - key = ikeys.Next() - key_list = list() - - while key is not None: - key_list.append(key.GetName()) - - for np in key_list: - outFile.Get("%s/mp_summary") - - -def report2(root_file, ne = 0, comments=""): - print(' mpMonTools.report(): root_file=', root_file) - from ROOT import TFile, TTree, TBranch, TCanvas, TPad, TGraph, TLegend, TMultiGraph, gStyle, TLatex, TPaveLabel, TPaveText, TH2I, TMath - - def getTreeList(tree, column, condition): - size = tree.Draw(column, condition,'goff'); - v1 = tree.GetV1(); v1.SetSize(size) - return list(v1) - - def makeGraph(tree, name, formula, condition="", color = 1, lineWidth=1): - tree.Draw(formula, condition, "goff") - graph = TGraph(int(tree.GetSelectedRows()), tree.GetV2(), tree.GetV1()) - graph.SetLineColor(color); - graph.SetLineWidth(lineWidth) - graph.SetName(name); - return graph - - def MakeMultiGraph(graph_data, mg, l): - clr = 1 - gl = list() - - if graph_data is None: - return [] - - if graph_data['type'] is 'graph': - for name, (tree, param) in graph_data['data'].items(): - clr+=1; formula =''; condition='' - if type(param).__name__=='tuple': - formula = param[0] - condition = param[1] - else: - print ("MakeMG: ", formula, condition) - formula = param - condition = "" - - print ("name=%s, tree=%s, formula=%s, condition=%s" % (name, tree.GetName(), formula, condition) ) - - - #g = makeGraph(tree, name, formula, condition, color=clr) - tree.Draw(formula, condition, "goff") - g = TGraph(int(tree.GetSelectedRows()), tree.GetV2(), tree.GetV1()); gl.append(g) - - g.SetName(name); g.SetLineColor(clr); g.SetMarkerColor(clr); g.SetLineWidth(0) - if name == "worker_rate": - g.SetLineColor(10) - - mg.Add(g); - l.AddEntry(g, name) - - if graph_data['type'] is 'list': - for name, (lx,ly) in graph_data['data'].items(): - print ("name=%s" % name); print (lx); print (ly) - clr+=1 - g = TGraph( len(lx), array.array('f', lx), array.array('f', ly) ) - g.SetName(name); g.SetLineColor(clr); g.SetLineWidth(1); g.SetMarkerColor(clr); - mg.Add(g) - l.AddEntry(g, name) - gl.append(g) - - if graph_data['type'] is 'array': - clr = 1 - g_list = list() - data = graph_data['data'] - for name,(x,y) in graph_data['data'].items(): - print (x); print (y) - clr+=1; - g = TGraph(len(x), x, y) - g.SetName(name); g.SetLineColor(clr); g.SetLineWidth(1); g.SetMarkerColor(clr) - gl.append(g) - mg.Add(g); - l.AddEntry(g, name) - if graph_data['type'] is 'text': - title.DrawPaveLabel(0.1,0.93,0.9,0.99, graph_data['title'], "brNDC") - for s in graph_data['data']: - print ("graph_data['data']=%s" % s) - sp_pt.AddText(s) - sp_pt.SetTextAlign(12); - sp_pt.SetTextSize(0.04) - sp_pt.Draw() - return [] - - if graph_data['type'] is 'latex': - title.DrawPaveLabel(0.1,0.93,0.9,0.99, graph_data['title'], "brNDC") - tl = TLatex(); tl.SetTextSize(0.02); tl.SetTextAlign(12); - txtd = graph_data['data'] - i = 0; x0 = 0.05; y0 = 0.90; dx = 0.08; dy = 0.05 - x1 = x0 - tl.DrawLatex(x1, y0, 'np') - for s in txtd['np']: - x1 = x1 + dx - tl.DrawLatex(x1, y0, s) - txtd.pop('np') - - for k in txtd.keys(): - y0 = y0 - dy - tl.DrawLatex(x0-0.03, y0, k); - x1 = x0 - for s in txtd[k]: - x1 = x1 + dx - tl.DrawLatex(x1, y0, s) - return [] - if 'goptions' in graph_data.keys(): - mg.Draw(graph_data['goptions']) - else: - mg.Draw('ALP') - - l.Draw() - h=mg.GetHistogram(); h.SetXTitle(graph_data['xtitle']); h.SetYTitle(graph_data['ytitle']); h.SetMinimum(0.1); - title.DrawPaveLabel(0.1,0.91,0.9,0.99, graph_data['title'], "brNDC") - if 'text' in graph_data.keys(): - title.DrawPaveLabel(0.2,0.88,0.8,0.92, graph_data['text'], "brNDC") - #text_box = TPaveText(0.2,0.51,0.8,0.54); text_box.AddText(graph_data['text']); - #text_box.SetFillColor(42); text_box.SetTextAlign(12); text_box.SetTextfONt(40); text_box.Draw(); - - return [] # gl #list of TGraph - - c = TCanvas("mpr", "AthenaMP-mp-scaling-charts", 10, 10, 800, 1024) - c.SetFillColor(17); c.SetBorderSize(1); c.cd() - - tfile = TFile(root_file, "READ"); print (" root compression factor = ", tfile.GetCompressionFactor()) - spSumTree = tfile.Get("sp_summary") - #cpSumTree = tfile.Get("cp_summary") - ioTree = tfile.Get("io") - cpuTree = tfile.Get("cpu") - numaTree = tfile.Get("numa") - - if ne is 0: - ne = int(root_file.split('.')[-2].replace('ne', '')) - print ("extracted ne=[%i]" % ne) - -##### FORMING THE DATA FOR ROOT Graphing-Charting-Histogramming ##### - np_list = list(set(getTreeList(spSumTree, 'np', ''))); np_list.sort() #uniqeify and sort np_list - elap_time_stdev = list() - elap_time_avg = list() - elap_time_max = list() - cpu_time_stdev = list() - cpu_time_max = list() - cpu_time_avg = list() - - total_rate = list() - elap_time_rate = list() - cpu_time_rate = list() - user_time_rate = list() - par_event_rate = list() - - elap_cpu_time = list() # elap - cpu time avg. - - elap_time_stdev_x = list() - cpu_time_stdev_x = list() - par_elap_time = list() # elap time as seen from mother - - sp_lb = [b.GetName() for b in list(spSumTree.GetListOfBranches())] - #sp_lb = [b.GetName() for b in list(spSumTree.GetListOfBranches())] - - #mp_txt = "%s" % mp_lb + "\n" - sp_txt = "%s" % sp_lb + "\n" - - sp_pt = TPaveText(0.1,0.1,0.9,0.9) - sp_pt.SetFillColor(22) - - sp_latex = TLatex() - sp_latex.SetTextAlign(12) - - txt_dict=dict() - ltxt_dict = dict() - for s in sp_lb: - txt_dict[s] = "%20s" % s - ltxt_dict[s] = list() - np_txt = "" - for np in np_list: - size = spSumTree.Draw('elap_time_x:cpu_time_x:elap_time_x-cpu_time_x', "np==%i" % int(np), 'goff'); - elapv = spSumTree.GetV1(); - cpuv = spSumTree.GetV2(); - elap_cpuv = spSumTree.GetV3(); - #userv = spSumTree.GetV4(); - - elap_time_stdev.append(float(TMath.RMS(size, elapv ))) - elap_time_avg.append(float(TMath.Mean(size, elapv ))) - elap_time_max.append(float(TMath.MaxElement(size, elapv ))) - - cpu_time_stdev.append(float(TMath.RMS(size, cpuv ))) - cpu_time_avg.append(float(TMath.Mean(size, cpuv ))) - cpu_time_max.append(float(TMath.MaxElement(size, cpuv ))) - - elap_cpu_time.append(float(TMath.Mean(size, elap_cpuv))) - - #elap_time_rate.append( float(60*ne)/float(np*elap_time_avg[-1]) ) - #cpu_time_rate.append( float(60*ne)/float(np*cpu_time_avg[-1]) ) - #user_time_rate.append( float(60*ne)/float( np * float(TMath.Mean(size, userv))) ) - - elap_time_rate.append( float(60*ne)/float(elap_time_avg[-1]) ) - cpu_time_rate.append( float(60*ne)/float(cpu_time_avg[-1]) ) - total_rate.append(np * elap_time_rate[-1]) - - #user_time_rate.append( float(60*ne)/float(float(TMath.Mean(size, userv))) ) - - #elap_time_stdev_x.append(float(TMath.RMS(size, cpSumTree.GetV2()))) - #cpu_time_stdev_x.append(float(TMath.RMS(size, cpSumTree.GetV4()))) - - #msize = mpSumTree.Draw('m_par_time', "np==%i" % int(np), 'goff') - #parv = mpSumTree.GetV1(); - #par_elap_time.append(float(TMath.Mean(msize, parv))) - #par_event_rate.append(float(60.0*ne)/par_elap_time[-1]) - - np_txt += "%10s" % np - for s in sp_lb: - txt_dict[s] += "%10.1f" % getTreeList(spSumTree, s, "np==%i" % int(np) )[0] - ltxt_dict[s].append( "%10.1f" % getTreeList(spSumTree, s, "np==%i" % int(np))[0] ) - - print ("np_list=%s\n etime_stdev=%s \n cpu_time_stdev=%s" % (np_list, elap_time_stdev, cpu_time_stdev)) - print ("elap-cpu=%s" % (elap_cpu_time)) - - from socket import gethostname - import platform -######################### - graph_list = list() -########################### - graph =dict() - graph['type'] = 'text' - graph['data'] = [ - "MP Times, Memory, IO, CPU PLOTS for ", - " %s " % root_file, - " machine: %s" % gethostname(), - #"%s, %s, %s, %s, %s, %s" % platform.uname(), - "%s" % platform.platform(), - "%s" % os.getenv('CMTCONFIG'), - " comments: %s" % comments, - "np=%s " % [int(s) for s in np_list] - ] - graph['title'] = 'ATHENA MP MONITOR mpMon REPORT' - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'latex' - graph['data'] = ltxt_dict - graph['title']= "sp_summary numbers:" - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'list' - graph['data'] = { - #'parallel_elap': (np_list, par_elap_time), - 'proc_elap_avg': (np_list, elap_time_avg), - 'proc_elap_max': (np_list, elap_time_max), - 'proc_cpu_avg': (np_list, cpu_time_avg), - 'proc_cpu_max': (np_list, cpu_time_max), - #'elap_time_stdev_x': (np_list, elap_time_stdev_x), - #'cpu_time_stdev_x': (np_list, cpu_time_stdev_x) - } - graph['title'] = "Processes Elap and CPU Time Variations" - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Time, sec" - graph_list.append(graph) - -############################ - graph =dict() - graph['type'] = 'text' - graph['data'] = [ - "This plot intentially left blank" - ] - graph['title'] = 'BLANK CHART' - graph_list.append(graph) - - """ - graph = dict() - graph['type'] = 'graph' - graph['data'] = { - "m_elap_time" : (mpSumTree, "m_elap_time:np"), - "m_firstevent_time": (mpSumTree, "m_firstevent_time:np"), - "m_par_time" : (mpSumTree, "m_par_time:np"), - "m_merging_time": (mpSumTree, "m_merging_time:np") - } - graph['title'] = "PARENT PROCESS TIMES" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Time, sec" - graph_list.append(graph) - -############################## - graph =dict() - graph['type'] = 'text' - graph['data'] = [ - "This plot intentially left blank" - ] - graph['title'] = 'BLANK CHART' - graph_list.append(graph) - """ -############################ - graph = dict() - graph['type'] = 'list' - graph['data'] = { - 'elap_time_stdev': (np_list, elap_time_stdev), - 'cpu_time_stdev': (np_list, cpu_time_stdev), - 'elap-cpu_avg': (np_list, elap_cpu_time), - } - graph['title'] = "Processes Time Deviations" - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Time_StDev, sec" - graph_list.append(graph) - -############################ - """ - graph = dict() - graph['type'] = 'graph' - graph['data'] = { - #"event_rate": (mpSumTree, "event_rate:np"), - #"event_rate_x": (mpSumTree, "(60*%i)/(x_par_time):np" % ne), - #"event_rate" : (mpSumTree, "(60*%i)/(m_par_time):np" % ne), - #"per_proc_rate": (mpSumTree, "event_proc_rate:np") - "event_rate" : (mpSumTree, "(60*%i*np)/(m_par_time):np" % ne), - "per_proc_rate": (mpSumTree, "event_proc_rate*np:np") - #"per_proc_rate_x": (mpSumTree, "(60*%i)/(x_par_time*np):np" % ne) - } - graph['title'] = "ATHENA MP EVENT PROCESSING RATE" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Evts/min, Evts/proc/min" - graph_list.append(graph) - """ - -############################ - - graph = dict() - graph['type'] = 'list' - graph['data'] = { - 'total_rate': (np_list, total_rate), - 'elap_avg_rate': (np_list, elap_time_rate), - 'cpu_avg_rate': (np_list, cpu_time_rate) - #'user_avg_rate': (np_list, user_time_rate) - } - graph['title'] = "Worker event processing rates Wall-Time, User-CPU, Total-CPU time rates, averaged." - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Evts/Proc/Min" - graph_list.append(graph) - -############################ - graph = dict() - graph['type'] = 'graph' - graph['data'] = { - #'parallel_rate': (mpSumTree, "event_rate:np"), #"(event_rate + 207/(30*(np-2)+m_par_time)):np"), - 'worker_rate': (spSumTree, "(60*%i)/(elap_time_x):np" % ne), - 'cpu_rate': (spSumTree, "(60*%i)/(cpu_time_x):np" % ne), - #'user_rate': (spSumTree, "(60*%i)/(user_time_x):np" % ne) - } - graph['title'] = "Event Throughput per Process, wall-clock time" - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Evts/Proc/Min" - graph['goptions'] = "ALP" - graph_list.append(graph) -############################ - - graph = dict() - graph['type'] = 'list' - graph['data'] = { - #'par_event_rate': (np_list, par_event_rate), - 'elap_avg_rate': (np_list, elap_time_rate), - 'cpu_avg_rate': (np_list, cpu_time_rate), - #'user_avg_rate': (np_list, user_time_rate) - } - graph['title'] = "Worker event processing rates Wall-Time, User-CPU, Total-CPU time rates, averaged." - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Evts/Proc/Min" - graph_list.append(graph) - -############################# - graph = dict() - graph['type'] = 'graph' - graph['data'] ={ - "total_mem": (spSumTree, "free_mem_spike/1024:np"), - "mem_per_proc": (spSumTree, "free_mem_spike/np/1024:np"), - } - graph['title'] = "PHYSICAL MEMORY CONSUMPTION BY Athena MJ (Multi Jobs)" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Memory Consumption, Mb" - graph_list.append(graph) - - cond = "vmem!=0" -############################# - graph = dict() - graph['type'] = 'graph' - graph['data'] ={ - "mem_per_proc": (spSumTree, "free_mem_spike/np/1024:np"), - "vmem": (spSumTree, ("vmem:np", cond) ), - "rss": (spSumTree, ("rss:np", cond) ) - } - graph['title'] = "VMEM, RSS, RealMemory(from free-spike) per Proc" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Memory Consumption, Mb" - graph_list.append(graph) - -############################ - cond = "np>0" -############################ - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - "bread/sec": (ioTree, ('breadps:Time', cond) ), - "bwrite/sec": (ioTree, ('bwrtnps:Time', cond) ) - } - graph['title'] = "IO Activity for Athena MJ (Multi Jobs) %s " % cond - graph['xtitle']= "Time" - graph['ytitle']= "Total Amount of Data R/W in blocks per sec" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - "write_reqs/sec": (ioTree, ('wtps:Time', cond) ), - "read_reqs/sec": (ioTree, ('rtps:Time', cond) ), - "total_reqs/sec": (ioTree, ('tps:Time', cond) ) - } - graph['title'] = "IO Activity for Athena MJ (Multi Jobs) %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Transfer_requests/sec" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'graph' - graph['data'] ={ - '%_user' : (cpuTree, ('puser:Time', cond) ), - '%_system': (cpuTree, ('psystem:Time', cond) ), - '%_idle': (cpuTree, ('pidle:Time', cond) ), - '%_io_wait': (cpuTree, ('piowait:Time', cond) ) - } - graph['title'] = "CPU Activity for Athena MJ (Multi Jobs) %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Percentage of CPU Utilization" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - '%_io_wait': (cpuTree, ('piowait:Time', cond) ), - } - graph['title'] = "CPU Activity for Athena MJ (Multi Jobs) %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Percentage of CPU Utilization" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -######################### - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - 'numa_hit': (numaTree, ('numa_hit:Time', cond) ), - 'numa_miss':(numaTree,('numa_miss:Time', cond) ), - 'numa_foreign': (numaTree, ('numa_foreign:Time', cond) ), - 'interleave_hit': (numaTree, ('interleave_hit:Time', cond) ), - 'local_node': (numaTree, ('local_node:Time', cond) ), - 'other_node': (numaTree, ('other_node:Time', cond) ), - } - graph['title'] = "NUMA Activity for athena MJ (Multi Jobs) %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Nbr of hits/misses" - graph['text']= "np = %s" % np_list - graph_list.append(graph) - - - - pads = list() - - cpp = 1 #charts per pad - ppc = 2 #pads per canvas - - c.cd() - gStyle.SetOptStat(0); - gStyle.SetPalette(1); - gStyle.SetCanvasColor(33); - gStyle.SetFrameFillColor(10); - gStyle.SetMarkerStyle(21) - gStyle.SetMarkerColor(2) - gStyle.SetMarkerSize(0.4) - print ("gStyle.Set done") - - title = TPaveLabel(0.1,0.98,0.9,1, "Athena MP Plots"); - title.SetFillColor(42); title.SetTextFont(40); - #title.Draw();print ("title Drawn") - - mgs = list() #List of TMultiGraphs - ls = list() #List of TLegends - gs = list() #List of TGraph - - for j in range(ppc): - y_factor = 0.99; x1 = 0.01; x2 = 0.99; y1 = y_factor - (y_factor-0.01)*(j+1)/float(ppc); y2 = y_factor - (y_factor-0.01)*j/float(ppc) - print ("x1,y1,x2,y2", x1, y1, x2, y2 ) - pad = TPad("pad%i" % j, "pad%i" % j, x1, y1, x2, y2, 33); pad.Draw() - pads.append(pad); - - num_cans = len(graph_list) /(cpp*ppc) if len(graph_list) % (cpp*ppc)==0 else len(graph_list)/(cpp*ppc) + 1 - graph_list += [None,]* (num_cans*cpp*ppc - len(graph_list)) - print ("number of pages/canvases in report = ", num_cans) - - pdf_file = root_file - for s in ['merged.', '.py', '.root']: - pdf_file = pdf_file.replace(s, '') - pdf_file ="%s.pdf" % pdf_file - - for i in range(num_cans): - for j in range(ppc): - graph = graph_list[ppc*i+j] - if graph is None: - continue - - if 'logY' in graph.keys(): - if graph['logY']: - pads[j].SetLogy() - else: - pads[j].SetLogy(0) - - pads[j].cd() - pads[j].SetRightMargin(0.2) - l = TLegend(0.82,0.20,0.99,0.89); ls.append(l) - mg = TMultiGraph(); mgs.append(mg) - print ("graph=", graph) - gs.append(MakeMultiGraph(graph, mg, l)) - - c.Update() - if i == 0: - print ("pdf.start") - c.Print(pdf_file+'(', 'pdf') #start page - elif i < num_cans-1: - print ("pdf.body") - c.Print(pdf_file, 'pdf') #body pages - else: - print ("pdf.end") - c.Print(pdf_file + ')', 'pdf') #end page - c.SaveAs("%s.%i.png" % (pdf_file, i)) - for pad in pads: - pad.Clear() - -def report(root_file, ne = 0, comments=""): - print(' mpMonTools.report(): root_file=', root_file) - from ROOT import TFile, TTree, TBranch, TCanvas, TPad, TGraph, TLegend, TMultiGraph, gStyle, TLatex, TPaveLabel, TPaveText, TH2I, TMath - - def getTreeList(tree, column, condition): - size = tree.Draw(column, condition,'goff'); - v1 = tree.GetV1(); v1.SetSize(size) - return list(v1) - - def makeGraph(tree, name, formula, condition="", color = 1, lineWidth=1): - tree.Draw(formula, condition, "goff") - graph = TGraph(int(tree.GetSelectedRows()), tree.GetV2(), tree.GetV1()) - graph.SetLineColor(color); - graph.SetLineWidth(lineWidth) - graph.SetName(name); - return graph - - def MakeMultiGraph(graph_data, mg, l): - clr = 1 - gl = list() - - if graph_data is None: - return [] - line_blank = 1 - if 'noline' in graph_data.keys(): - line_blank=0 - - if graph_data['type'] is 'graph': - for name, (tree, param) in graph_data['data'].items(): - clr+=1; formula =''; condition='' - if type(param).__name__=='tuple': - formula = param[0] - condition = param[1] - else: - print ("MakeMG: ", formula, condition) - formula = param - condition = "" - - print ("name=%s, tree=%s, formula=%s, condition=%s" % (name, tree.GetName(), formula, condition) ) - - tree.Draw(formula, condition, "goff") - - selection_size = tree.GetSelectedRows() - if selection_size==-1: - print ("-> SKIPPED (DO NOT EXIST): SELECTION_SIZE=%i" % selection_size ) - continue - else: - print ("-> SELECTION_SIZE=%i" % selection_size ) - pass - - g = TGraph(selection_size, tree.GetV2(), tree.GetV1()); gl.append(g) - - g.SetName(name); g.SetLineColor(clr*line_blank); g.SetMarkerColor(clr); g.SetLineWidth(0) - #if "rate" in name: - # g.SetLineColor(0) - - mg.Add(g); - l.AddEntry(g, name) - - if graph_data['type'] is 'list': - for name, (lx,ly) in graph_data['data'].items(): - print ("name=%s" % name); print (lx); print (ly) - clr+=1 - g = TGraph( len(lx), array.array('f', lx), array.array('f', ly) ) - g.SetName(name); g.SetLineColor(clr*line_blank); g.SetLineWidth(1); g.SetMarkerColor(clr); - mg.Add(g) - l.AddEntry(g, name) - gl.append(g) - - if graph_data['type'] is 'array': - clr = 1 - g_list = list() - data = graph_data['data'] - for name,(x,y) in graph_data['data'].items(): - print (x); print (y) - clr+=1; - g = TGraph(len(x), x, y) - g.SetName(name); g.SetLineColor(clr*line_blank); g.SetLineWidth(1); g.SetMarkerColor(clr) - gl.append(g) - mg.Add(g); - l.AddEntry(g, name) - if graph_data['type'] is 'text': - title.DrawPaveLabel(0.1,0.93,0.9,0.99, graph_data['title'], "brNDC") - for s in graph_data['data']: - print ("graph_data['data']=%s" % s) - sp_pt.AddText(s) - sp_pt.SetTextAlign(12); - sp_pt.SetTextSize(0.04) - sp_pt.Draw() - return [] - - if graph_data['type'] is 'latex': - title.DrawPaveLabel(0.1,0.93,0.9,0.99, graph_data['title'], "brNDC") - tl = TLatex(); tl.SetTextSize(0.02); tl.SetTextAlign(12); - txtd = graph_data['data'] - i = 0; x0 = 0.05; y0 = 0.90; dx = 0.08; dy = 0.05 - x1 = x0 - tl.DrawLatex(x1, y0, 'np') - for s in txtd['np']: - x1 = x1 + dx - tl.DrawLatex(x1, y0, s) - txtd.pop('np') - - for k in txtd.keys(): - y0 = y0 - dy - tl.DrawLatex(x0-0.03, y0, k); - x1 = x0 - for s in txtd[k]: - x1 = x1 + dx - tl.DrawLatex(x1, y0, s) - return [] - if 'goptions' in graph_data.keys(): - mg.Draw(graph_data['goptions']) - else: - mg.Draw('ALP') - - l.Draw() - h=mg.GetHistogram(); h.SetXTitle(graph_data['xtitle']); h.SetYTitle(graph_data['ytitle']); h.SetMinimum(0.1); - title.DrawPaveLabel(0.1,0.91,0.9,0.99, graph_data['title'], "brNDC") - if 'text' in graph_data.keys(): - title.DrawPaveLabel(0.2,0.88,0.8,0.92, graph_data['text'], "brNDC") - #text_box = TPaveText(0.2,0.51,0.8,0.54); text_box.AddText(graph_data['text']); - #text_box.SetFillColor(0); text_box.SetTextAlign(12); text_box.SetTextfONt(40); text_box.Draw(); - - return [] # gl #list of TGraph - - c = TCanvas("mpr", "AthenaMJ-mp-scaling-charts", 1, 1, 800, 1024) - c.SetFillColor(0); c.SetBorderSize(1); c.cd() - - tfile = TFile(root_file, "READ"); print (" root compression factor = ", tfile.GetCompressionFactor()) - spSumTree = tfile.Get("sp_summary") - #cpSumTree = tfile.Get("cp_summary") - ioTree = tfile.Get("io") - cpuTree = tfile.Get("cpu") - numaTree = tfile.Get("numa") - memTree = tfile.Get("mem") - - if ne is 0: - ne = int(root_file.split('.')[-2].replace('ne', '')) - print ("extracted ne=[%i]" % ne) - -##### FORMING THE DATA FOR ROOT Graphing-Charting-Histogramming ##### - np_list = list(set(getTreeList(spSumTree, 'np', ''))); np_list.sort() #uniqeify and sort np_list - elap_time_stdev = list() - elap_time_avg = list() - elap_time_max = list() - cpu_time_stdev = list() - cpu_time_max = list() - cpu_time_avg = list() - - total_rate = list() - elap_time_rate = list() - cpu_time_rate = list() - user_time_rate = list() - par_event_rate = list() - - elap_cpu_time = list() # elap - cpu time avg. - - elap_time_stdev_x = list() - cpu_time_stdev_x = list() - par_elap_time = list() # elap time as seen from mother - - sp_lb = [b.GetName() for b in list(spSumTree.GetListOfBranches())] - #sp_lb = [b.GetName() for b in list(spSumTree.GetListOfBranches())] - - #mp_txt = "%s" % mp_lb + "\n" - sp_txt = "%s" % sp_lb + "\n" - - sp_pt = TPaveText(0.1,0.1,0.9,0.9) - sp_pt.SetFillColor(0) - - sp_latex = TLatex() - sp_latex.SetTextAlign(12) - - txt_dict=dict() - ltxt_dict = dict() - for s in sp_lb: - txt_dict[s] = "%20s" % s - ltxt_dict[s] = list() - ltxt_dict["total_rate"] = list() - ltxt_dict["proc_rate_avg"] = list() - - np_txt = "" - for np in np_list: - size = spSumTree.Draw('(elap_time_x-init_time_x):cpu_time_x:elap_time_x-cpu_time_x', "np==%i" % int(np), 'goff'); - elapv = spSumTree.GetV1(); - cpuv = spSumTree.GetV2(); - elap_cpuv = spSumTree.GetV3(); - #userv = spSumTree.GetV4(); - - elap_time_stdev.append(float(TMath.RMS(size, elapv ))) - elap_time_avg.append(float(TMath.Mean(size, elapv ))) - elap_time_max.append(float(TMath.MaxElement(size, elapv ))) - - cpu_time_stdev.append(float(TMath.RMS(size, cpuv ))) - cpu_time_avg.append(float(TMath.Mean(size, cpuv ))) - cpu_time_max.append(float(TMath.MaxElement(size, cpuv ))) - - elap_cpu_time.append(float(TMath.Mean(size, elap_cpuv))) - - #elap_time_rate.append( float(60*ne)/float(np*elap_time_avg[-1]) ) - #cpu_time_rate.append( float(60*ne)/float(np*cpu_time_avg[-1]) ) - #user_time_rate.append( float(60*ne)/float( np * float(TMath.Mean(size, userv))) ) - - elap_time_rate.append( float(60*ne)/float(elap_time_avg[-1]) ) - cpu_time_rate.append( float(60*ne)/float(cpu_time_avg[-1]) ) - total_rate.append(np * elap_time_rate[-1]) - - #user_time_rate.append( float(60*ne)/float(float(TMath.Mean(size, userv))) ) - - #elap_time_stdev_x.append(float(TMath.RMS(size, cpSumTree.GetV2()))) - #cpu_time_stdev_x.append(float(TMath.RMS(size, cpSumTree.GetV4()))) - - #msize = mpSumTree.Draw('m_par_time', "np==%i" % int(np), 'goff') - #parv = mpSumTree.GetV1(); - #par_elap_time.append(float(TMath.Mean(msize, parv))) - #par_event_rate.append(float(60.0*ne)/par_elap_time[-1]) - - np_txt += "%10s" % np - for s in sp_lb: - gtl = getTreeList(spSumTree, s, "np==%i" % int(np) ) - print ("%s: getTreeList: %s" % (s,gtl), end='') - gtl_avg = meanList(gtl) - print (" avg=%10.1f" % gtl_avg) - txt_dict[s] += "%10.1f" % gtl_avg - ltxt_dict[s].append( "%10.1f" % gtl_avg) - ltxt_dict["total_rate"].append("%10.1f" % - ( 60.0*float(np)*float(ne)/( float(ltxt_dict["elap_time_x"][-1]) - float(ltxt_dict["init_time_x"][-1]) ) ) ) - ltxt_dict["proc_rate_avg"].append("%10.1f" % - ( 60.0*float(ne)/( float(ltxt_dict["elap_time_x"][-1]) - float(ltxt_dict["init_time_x"][-1]) ) ) ) - - print ("np_list=%s\n etime_stdev=%s \n cpu_time_stdev=%s" % (np_list, elap_time_stdev, cpu_time_stdev)) - print ("elap-cpu=%s" % (elap_cpu_time)) - - from socket import gethostname - import platform - graph_list = list() -########################### - graph =dict() - graph['type'] = 'text' - graph['data'] = [ - "MJ Times, Memory, IO, CPU PLOTS for ", - " %s " % root_file, - " machine: %s" % gethostname(), - #"%s, %s, %s, %s, %s, %s" % platform.uname(), - "%s" % platform.platform(), - "%s" % os.getenv('CMTCONFIG'), - " comments: %s" % comments, - "np=%s " % [int(s) for s in np_list] - ] - graph['title'] = 'ATHENA MJ MONITOR mjMon REPORT' - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'latex' - graph['data'] = ltxt_dict - graph['title']= "mj_summary numbers:" - graph_list.append(graph) - -######################### - - graph = dict() - graph['type'] = 'list' - graph['data'] = { - 'total_rate': (np_list, total_rate), - #'elap_avg_rate': (np_list, elap_time_rate), - #'cpu_avg_rate': (np_list, cpu_time_rate) - #'user_avg_rate': (np_list, user_time_rate) - } - graph['title'] = "Total Event processing rate, averaged from Wall-Time rates." - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Evts/Proc/Min" - graph_list.append(graph) - -############################ - graph = dict() - graph['type'] = 'list' - graph['data'] = { - #'parallel_elap': (np_list, par_elap_time), - 'proc_elap_avg': (np_list, elap_time_avg), - 'proc_elap_max': (np_list, elap_time_max), - 'proc_cpu_avg': (np_list, cpu_time_avg), - 'proc_cpu_max': (np_list, cpu_time_max), - #'elap_time_stdev_x': (np_list, elap_time_stdev_x), - #'cpu_time_stdev_x': (np_list, cpu_time_stdev_x) - } - graph['title'] = "Processes Elap and CPU Time Variations" - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Time, sec" - graph_list.append(graph) - -############################ - graph =dict() - graph['type'] = 'text' - graph['data'] = [ - "This plot intentially left blank" - ] - graph['title'] = 'BLANK CHART' - graph_list.append(graph) - -############################ - graph = dict() - graph['type'] = 'list' - graph['data'] = { - 'elap_time_stdev': (np_list, elap_time_stdev), - 'cpu_time_stdev': (np_list, cpu_time_stdev), - 'elap-cpu_avg': (np_list, elap_cpu_time), - } - graph['title'] = "Processes Time Deviations" - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Time_StDev, sec" - graph_list.append(graph) - -############################ - - graph = dict() - graph['type'] = 'list' - graph['noline']=0 - graph['data'] = { - 'total_rate': (np_list, total_rate), - 'elap_avg_rate': (np_list, elap_time_rate), - 'cpu_avg_rate': (np_list, cpu_time_rate) - #'user_avg_rate': (np_list, user_time_rate) - } - graph['title'] = "Worker event processing rates Wall-Time, User-CPU, Total-CPU time rates, averaged." - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Evts/Proc/Min" - graph_list.append(graph) - -############################ - graph = dict() - graph['type'] = 'graph' - graph['noline']=0 - graph['data'] = { - #'parallel_rate': (mpSumTree, "event_rate:np"), #"(event_rate + 207/(30*(np-2)+m_par_time)):np"), - 'worker_rate': (spSumTree, "(60*%i)/(elap_time_x):np" % ne), - 'worker_rate_': (spSumTree, "(60*%i)/(elap_time_x-200):np" % ne), - 'cpu_rate': (spSumTree, "(60*%i)/(cpu_time_x):np" % ne), - #'user_rate': (spSumTree, "(60*%i)/(user_time_x):np" % ne) - } - graph['title'] = "Event Throughput per Process, wall-clock time" - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Evts/Proc/Min" - graph['goptions'] = "ALP" - graph_list.append(graph) -############################ - - graph = dict() - graph['type'] = 'list' - graph['noline']=0 - graph['data'] = { - #'par_event_rate': (np_list, par_event_rate), - 'elap_avg_rate': (np_list, elap_time_rate), - 'cpu_avg_rate': (np_list, cpu_time_rate), - #'user_avg_rate': (np_list, user_time_rate) - } - graph['title'] = "Worker event processing rates Wall-Time, User-CPU, Total-CPU time rates, averaged." - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Evts/Proc/Min" - graph_list.append(graph) - -############################# - graph = dict() - graph['type'] = 'graph' - graph['noline']=0 - graph['data'] ={ - "total_mem": (spSumTree, "free_mem_spike:np"), - "mem_per_proc": (spSumTree, "free_mem_spike/np:np"), - } - graph['title'] = "PHYSICAL MEMORY CONSUMPTION BY Athena MJ (Multi Jobs)" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Memory Consumption, Mb" - graph_list.append(graph) - - cond = "vmem!=0" -############################# - graph = dict() - graph['type'] = 'graph' - graph['noline']=0 - graph['data'] ={ - "mem_per_proc": (spSumTree, "free_mem_spike/np:np"), - "vmem": (spSumTree, ("vmem:np", cond) ), - "rss": (spSumTree, ("rss:np", cond) ) - } - graph['title'] = "VMEM, RSS, RealMemory(from free-spike) per Proc" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Memory Consumption, Mb" - graph_list.append(graph) - -############################ - cond="np>0" - graph = dict() - graph['type'] = 'graph' - graph['logY'] = False - graph['data'] ={ - 'free_mem': (memTree, ('kbmemfree/1024:Time', cond) ), - 'used_mem': (memTree, ('kbmemused/1024:Time', cond) ), - 'cached_mem': (memTree, ('kbcached/1024:Time', cond) ), - 'buffers_mem': (memTree, ('kbbuffers/1024:Time', cond) ), - 'kbswpused': (memTree, ('kbswapused/1024:Time', cond) ) - #'commit_mem': (memTree, ('kbcommit/1024:Time', cond) ) - } - graph['title'] = "Memory Activity for Athena MJ %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Memory, Mb " - #graph['text']= "np = %s" % np_list - graph_list.append(graph) -############################ - cond = "np>0" -############################ - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - "bread/sec": (ioTree, ('breadps:Time', cond) ), - "bwrite/sec": (ioTree, ('bwrtnps:Time', cond) ) - } - graph['title'] = "IO Activity for Athena MJ (Multi Jobs) %s " % cond - graph['xtitle']= "Time" - graph['ytitle']= "Total Amount of Data R/W in blocks per sec" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - "write_reqs/sec": (ioTree, ('wtps:Time', cond) ), - "read_reqs/sec": (ioTree, ('rtps:Time', cond) ), - "total_reqs/sec": (ioTree, ('tps:Time', cond) ) - } - graph['title'] = "IO Activity for Athena MJ (Multi Jobs) %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Transfer_requests/sec" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'graph' - graph['data'] ={ - '%_user' : (cpuTree, ('puser:Time', cond) ), - '%_system': (cpuTree, ('psystem:Time', cond) ), - '%_idle': (cpuTree, ('pidle:Time', cond) ), - '%_io_wait': (cpuTree, ('piowait:Time', cond) ) - } - graph['title'] = "CPU Activity for Athena MJ (Multi Jobs) %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Percentage of CPU Utilization" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - '%_io_wait': (cpuTree, ('piowait:Time', cond) ), - } - graph['title'] = "CPU Activity for Athena MJ (Multi Jobs) %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Percentage of CPU Utilization" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -######################### - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - 'numa_hit': (numaTree, 'numa_hit:Time' ), - 'numa_miss':(numaTree,'numa_miss:Time' ), - 'numa_foreign': (numaTree, 'numa_foreign:Time' ), - 'interleave_hit': (numaTree, 'interleave_hit:Time' ), - 'local_node': (numaTree, 'local_node:Time' ), - 'other_node': (numaTree, 'other_node:Time') - } - graph['title'] = "NUMA Activity for athena MJ (Multi Jobs) %s, Logscale" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Nbr of hits/misses" - graph['text']= "np = %s" % np_list - graph_list.append(graph) - -######################### - graph = dict() - graph['type'] = 'graph' - #graph['logY'] = False - graph['data'] ={ - 'numa_hit': (numaTree, 'numa_hit:Time' ), - 'numa_miss':(numaTree,'numa_miss:Time' ), - 'numa_foreign': (numaTree, 'numa_foreign:Time' ), - 'interleave_hit': (numaTree, 'interleave_hit:Time' ), - 'local_node': (numaTree, 'local_node:Time' ), - 'other_node': (numaTree, 'other_node:Time' ) - } - graph['title'] = "NUMA Activity for athena MJ (Multi Jobs) %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Nbr of hits/misses" - graph['text']= "np = %s" % np_list - graph_list.append(graph) - - - pads = list() - - cpp = 1 #charts per pad - ppc = 1 #pads per canvas - - c.cd() - gStyle.SetOptStat(0); - gStyle.SetPalette(0); - gStyle.SetCanvasColor(0); - gStyle.SetFrameFillColor(0); - gStyle.SetMarkerStyle(21) - gStyle.SetMarkerColor(2) - gStyle.SetMarkerSize(0.5) - print ("gStyle.Set done") - - title = TPaveLabel(0.1,0.98,0.9,1, "Athena MJ Plots"); - title.SetFillColor(0); title.SetTextFont(40); - #title.Draw();print ("title Drawn") - - mgs = list() #List of TMultiGraphs - ls = list() #List of TLegends - gs = list() #List of TGraph - - for j in range(ppc): - y_factor = 0.99; x1 = 0.01; x2 = 0.99; y1 = y_factor - (y_factor-0.01)*(j+1)/float(ppc); y2 = y_factor - (y_factor-0.01)*j/float(ppc) - print ("x1,y1,x2,y2", x1, y1, x2, y2 ) - pad = TPad("pad%i" % j, "pad%i" % j, x1, y1, x2, y2, 0); pad.Draw() - pads.append(pad); - - num_cans = len(graph_list) /(cpp*ppc) if len(graph_list) % (cpp*ppc)==0 else len(graph_list)/(cpp*ppc) + 1 - graph_list += [None,]* (num_cans*cpp*ppc - len(graph_list)) - print ("number of pages/canvases in report = ", num_cans) - - pdf_file = root_file - for s in ['merged.', '.py', '.root']: - pdf_file = pdf_file.replace(s, '') - pdf_file ="%s.pdf" % pdf_file - - for i in range(num_cans): - for j in range(ppc): - graph = graph_list[ppc*i+j] - if graph is None: - continue - - if 'logY' in graph.keys(): - if graph['logY']: - pads[j].SetLogy() - else: - pads[j].SetLogy(0) - - pads[j].cd() - pads[j].SetRightMargin(0.2) - l = TLegend(0.82,0.20,0.99,0.89); ls.append(l) - mg = TMultiGraph(); mgs.append(mg) - print ("graph=", graph) - gs.append(MakeMultiGraph(graph, mg, l)) - - c.Update() - if i == 0: - print ("pdf.start") - c.Print(pdf_file+'(', 'pdf') #start page - elif i < num_cans-1: - print ("pdf.body") - c.Print(pdf_file, 'pdf') #body pages - else: - print ("pdf.end") - c.Print(pdf_file + ')', 'pdf') #end page - #c.SaveAs("%s.%i.png" % (pdf_file, i)) - c.SaveAs("%s.%i.C" % (pdf_file, i)) - for pad in pads: - pad.Clear() - -########## THE END ################ - diff --git a/Control/AthenaMP/python/tests/mpMonTools.py b/Control/AthenaMP/python/tests/mpMonTools.py deleted file mode 100644 index 9d59eb9fff18ec136f68f65cc47f168a1753e935..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/python/tests/mpMonTools.py +++ /dev/null @@ -1,2411 +0,0 @@ -# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration - -# @file: mpMonTools.py -# @purpose: Library for mp performance monitoring of AthenaMP -# @author: Mous Tatarkhanov <tmmous@cern.ch> -# @date: December 2009 - -from __future__ import print_function - -__version__ = "$Revision: 329336 $" -__author__ = "Mous Tatarkhanov <tmmous@cern.ch>" - -import sys -import os -import signal -import time -import array -import copy -import six - -from future import standard_library -standard_library.install_aliases() -import subprocess - - -T0 = time.time() -numa_T0 = T0 -TIME_STEP = 10 -KB = (1 << 10) -MB = (1 << 20) -GB = (1 << 30) - - -PAGESIZE=os.sysconf("SC_PAGE_SIZE")/1024 #KiB -have_pss=0 - -mp_stat = dict() #dictionary which will hold all statistics: ProcDict objects, io-,mem-,cpu-dicts -pid_list = list() #book-keeping of all spawned and launched processes for later clean-up -init_mem = list() -init_numa = list() - -def init_mp_stat(): - global mp_stat - global pid_list - global T0 - global numa_T0 - global init_mem - global init_numa - - del mp_stat #delete data from previous np - del pid_list #delete data from previous np - - #brand new mp_stat - mp_stat = dict() - pid_list = list() - mp_stat['io'] = dict() - mp_stat['mem'] = dict() - mp_stat['cpu'] = dict() - mp_stat['cpid'] = dict() - mp_stat['mpid'] = dict() - - mp_stat['numa'] = dict() - mp_stat['numa']['Time'] = list() - mp_stat['numa']['numa_hit'] = list() - mp_stat['numa']['numa_miss'] = list() - mp_stat['numa']['numa_foreign'] = list() - mp_stat['numa']['interleave_hit'] = list() - mp_stat['numa']['local_node'] = list() - mp_stat['numa']['other_node'] = list() - - T0 = time.time() - init_mem = get_memstat() - init_numa = list(get_numastat()) - init_numa[0] = numa_T0 - - print ("initial_mem=%s" % init_mem) - print ("initial_numa=%s" % list(init_numa)) - - init_mem = get_memstat() - -class ProcDict(dict): - """this is to store the process information""" - key_list = ['state', 'vmem', 'rss', 'sz', 'cpu_time', 'elap_time', 'private', 'shared'] - pid = -1 - start_time = -1 - elap_time = -1 - cpu_time = -1 - vmem = -1 - rss = -1 - nbr_rows = -1 # the number of rows - - def __init__(self, pid=-1, start_time=-1, child = True): - self.pid = pid - self.start_time = start_time - #self['np']=np - - for key in self.key_list: - self[key] = list() - self.proc_ps_stat() - if child: - mp_stat["cpid"][self.pid] = self - else: - mp_stat["mpid"] = self - - def add_ps_line(self, line): - ps_str = line.split() - if self.start_time is -1: - self.start_time = _seconds(ps_str[5]) - self['state'].append( ps_str[1] ) - self['vmem'].append( int(ps_str[2])/1024 ) - self['rss'].append( int(ps_str[3])/1024 ) - self['sz'].append( int(ps_str[4])/1024 ) - self['cpu_time'].append( _seconds(ps_str[6]) ) - self['elap_time'].append( _seconds(ps_str[7]) ) - - private = shared = -1 - try: - pass - #private, shared = _get_shared_private_mem(self.pid) - except Exception as e: - print ("## Caught exception [%s] !!" % str(e.__class__)) - print ("## What:", e) - print (sys.exc_info()[0]) - print (sys.exc_info()[1]) - self["private"].append(private) - self["shared"].append(shared) - - def proc_ps_stat(self): - """ ps statistics for this process of pid """ - out = subprocess.getoutput("ps --pid %i -o pid,state,vsize,rss,sz,start,cputime,etime" % self.pid) - lines = out.splitlines() - if len(lines) > 1: - self.add_ps_line(lines[1]) - else: - print ("there is no process with pid: [%i]", self.pid) - return False - return True - - def children_exist(self): - """ figures out weather the np kids were spawned for mother mpid""" - sc, out = subprocess.getstatusoutput("ps --ppid %i -o pid,start" % self.pid) - if sc is not 0: - #print (" children_exist: Error, sc=%i" % sc) - return False - - ps_lines = out.splitlines() - nc = len(ps_lines)-1 - print (" children_exist().nbr of children = %i" % nc) - if nc > 0 : - print ("%i children workers exist. Creating ProcDicts..." % nc) - ps_lines.pop(0) - for line in ps_lines: - ps_str = line.split() - cpid = int(ps_str[0]) - ProcDict(cpid, start_time = _seconds(ps_str[1])) - print ("..... child [%i] added" % cpid) - return nc - else: - #print ("no children exist for parent: %s " % self.pid) - return False - - - def trim_stat(self): - """trim finished process information at the end of statistics """ - while True: - if (self['state'][-1] in ['S', 'Z', 'T', 'D', 'X']) or (self['vmem'][-1] is 0) or (self['rss'][-1] is 0): #STOPPED, ZOMBIE, STOPPED2, Dddd, Xxxx - for key in self.key_list: - self[key].pop() - else: - break - - def summarize(self): - """finalize summary information.""" - self.elap_time = self['elap_time'][-1] - self.cpu_time = self['cpu_time'][-1] - self.vmem = self['vmem'][-1] - self.rss = self['rss'][-1] - self.nbr_rows = len(self['elap_time']) - -class CPSummary(dict): - """ Children Processes Summary - convenient way of presenting to ROOT or other output methods""" - cpid_list=list() - np = 1 - total_cpu_time = 0 - total_cpu_time_x = 0 - total_elap_time = 0 - total_elap_time_x = 0 - def __init__(self, np): - #self['np']=list() - self.np = np - self['pid']=list() - self['elap_time']=list() - self['elap_time_x']=list() - self['cpu_time']=list() - self['user_time']=list() - self['system_time']=list() - self['bootstrap_time']=list() - self['cpu_time_x']=list() - self['vmem']=list() - self['rss']=list() - - def extract_summary(self, dir): - self.cpid_list = mp_stat["cpid"].keys() - for pid in self.cpid_list: - self['pid'].append(pid) - #self['np'].append(self.np) - self['elap_time_x'].append(mp_stat['cpid'][pid].elap_time) - self['cpu_time_x'].append(mp_stat['cpid'][pid].cpu_time) - self.total_cpu_time_x += mp_stat['cpid'][pid].cpu_time - self.total_elap_time_x += mp_stat['cpid'][pid].elap_time - - self['vmem'].append(mp_stat['cpid'][pid].vmem) - self['rss'].append(mp_stat['cpid'][pid].rss) - for pid in self.cpid_list: - print (" %s/%s exists ->" % (dir,pid), os.path.exists(os.path.join(dir,"%s" % pid))) #FIX: add the extraction from cpid's logs. - out_path = os.path.join(dir, "%s" % pid, 'stdout') - err_path = os.path.join(dir, "%s" % pid, 'stderr') - - e_time = float(grepValue(out_path, "WORKER_ELAP_TIME")); - u_time = float(grepValue(out_path, "WORKER_USER_TIME")); - s_time = float(grepValue(out_path, "WORKER_SYSTEM_TIME")); - c_time = u_time + s_time - - self['elap_time'].append(e_time); self.total_elap_time += e_time; - self['user_time'].append(u_time) - self['system_time'].append(s_time) - self['cpu_time'].append(c_time); - self.total_cpu_time += c_time; - - b_time = float(grepValue(out_path, "BOOTSTRAP_ELAP_TIME")); - self['bootstrap_time'].append(b_time); - - - - - -class MPSummary(dict): - """ Mother Process Summary - convenient way of presenting data to ROOT...""" - mpid = dict() - np = 1 - def __init__(self, np): - mpid = mp_stat['mpid'] - self.np = np - #self['np'] = list() - #self['np'] = [ np, ] - self["m_par_time"] = list() - self["m_firstevent_time"]= list() - self["m_cpu_time"]= list() - self["m_elap_time"]= list() - self["x_init_time"]= list()#externally observed time - self["x_par_time"]= list() #externally observed time - self["x_fin_time"]= list() #externally observed time - self["free_mem_spike"]= list() #externally observed value - self["swap_change"] = list() - - def extract_summary(self, log): - self["m_par_time"] = [ float(grepValue(log, "PARALLEL_ELAP_TIME")),] - self['m_merging_time'] = [ float(grepValue(log, "MERGING_ELAP_TIME")),] - self["m_firstevent_time"] = [ float(grepValue(log, "FIRSTEVENT_ELAP_TIME")),] - self["m_cpu_time"] = [ mp_stat["mpid"].cpu_time, ] #mp_stat["mpid"]["cpu_time"][-1] - self["m_elap_time"]= [ mp_stat["mpid"].elap_time, ] #mp_stat["mpid"]["elap_time"][-1] - self["free_mem_spike"]= [ get_spike(sumList(mp_stat['mem']['kbmemfree'], - sumList(mp_stat['mem']['kbbuffers'], mp_stat['mem']['kbcached'])) - ), ] - self["swap_change"] = [ 0,]# get_spike(mp_stat['mem']['kbswpfree']), ] - - - - -####### useful auxillary functions ########################## - -def _seconds(time_str): #handles time in "H:M:S" and "M:S" format - time_nums = time_str.split(":") - if (len(time_nums)==3): - return 3600*int(time_nums[0])+60*int(time_nums[1]) + int(time_nums[2]) - elif (len(time_nums)==2): - return 60*int(time_nums[0]) + int(time_nums[1]) - print ("ERROR: _seconds() returning - 0") - return 0 - -def get_numastat(): - sc,out=subprocess.getstatusoutput("numastat") - if sc==256: - print ("mjMonTools.get_numastat: numastat is not working! zeroes will be returned") - return (0,0,0,0,0,0,0) - else: - lines = out.splitlines() - return (time.time(), - int(lines[1].split()[1]), - int(lines[2].split()[1]), - int(lines[3].split()[1]), - int(lines[4].split()[1]), - int(lines[5].split()[1]), - int(lines[6].split()[1]) - ) - -def save_numastat(): - current_numa = get_numastat() - #print ("current_numa=%s" % list(current_numa)) - - _numa_stat = ( - mp_stat['numa']['Time'], - mp_stat['numa']['numa_hit'], - mp_stat['numa']['numa_miss'], - mp_stat['numa']['numa_foreign'], - mp_stat['numa']['interleave_hit'], - mp_stat['numa']['local_node'], - mp_stat['numa']['other_node'] - ) - - change_numa = subList(current_numa,init_numa) - print ("NUMA_CHANGE=%s" % change_numa) - return [_numa_stat[i].append(change_numa[i]) for i in range(len(change_numa))] - -def print_memstat(msg =""): - mem = get_memstat() - t = time.time() - T0; - save_numastat() - print (msg + " [T=%i sec]" % t + " USED[%i Mb][change: %i Mb] - FREE[%i Mb][change: %i Mb]" % ( - mem["USED"], mem["USED"]-init_mem["USED"], mem["FREE"], mem["FREE"]-init_mem["FREE"])) - -def get_memstat(): - out=subprocess.getoutput("free -m") - mem = dict() - lines = out.splitlines() - mem_strs = lines[1].split() - mem['used'] = int(mem_strs[2]) - mem['free'] = int(mem_strs[3]) - mem['cached'] = int(mem_strs[5]) - mem['buffers'] = int(mem_strs[6]) - mem_strs = lines[2].split() - mem['USED'] = int(mem_strs[2]) - mem['FREE'] = int(mem_strs[3]) - #print ("mem: [%s Mbs]" % mem) - return mem - -init_mem = get_memstat() - -def meanList(num_list): - """finds average value of the number list""" - if len(num_list) == 0: - print ("meanList: WARNING - empty list, returning 0.0") - return 0.0 - return float(sum(num_list)) / len(num_list) - -def sumList(l1, l2): - """sum up values of two lists l1 + l2""" - if len(l1) is not len(l2): - print ("sumList: WARNING: len(l1) not equals len(l2)") - n = len(l1) if len(l2) > len(l1) else len(l2) - else: - n = len(l1) - - sum = list() - for i in range(n): - sum.append(l1[i] + l2[i]) - return sum - -def subList(l1, l2): - """subtract values of two lists: l1 - l2""" - if len(l1) is not len(l2): - print ("subList: WARNING: len(l1) not equals len(l2)") - n = len(l1) if len(l2) > len(l1) else len(l2) - else: - n = len(l1) - - sub = list() - for i in range(n): - sub.append(l1[i] - l2[i]) - return sub - -def get_spike(l): - #print (" get_spike:",) - #print (" e0 = ", l[0]/1024, "Mb", end='') - #print (" eN = ", l[-1]/1024, "Mb", end='') - #print (" max = ", max(l)/1024, "Mb", end='' ) - #print (" min = ", min(l)/1024, "Mb", end='') - #print (" e0 - eN = ", (l[0] - l[-1])/1024, "Mb", end='') - #print (" e0 - min = ", (l[0] - min(l))/1024, "Mb", end='') - #print (" eN - min = ", (l[-1] - min(l))/1024, "Mb", end='') - #print (" return max - min =", (max(l) - min(l))/1024, "Mb") - return max(l) - min(l) - -def prepare_mp_stat(): - """ this function construct "cp" dict from "cpid" dict for ROOT formation convenience - converting: cpid:pid:vmem,rss,cpu-time,... -> cp:pid,vmem,rss,cputime,... """ - mp_stat['cp'] = mp_stat['cpid'] - del mp_stat['cpid'] - mp_stat['cpid']=dict() - mp_stat['cpid']['pid'] = list() - - for key in ProcDict.key_list: - mp_stat['cpid'][key]=list() - - for pid in mp_stat['cp'].keys(): - mp_stat['cpid']['pid'] += [pid,] * mp_stat['cp'][pid].nbr_rows - for key in ProcDict.key_list: - mp_stat['cpid'][key] += mp_stat['cp'][pid][key] - del mp_stat['cp'] - - -def print_summary(): - print ("===== MOTHER PROCESS SUMMARY =====") - for (k, v) in mp_stat['mp_summary'].items(): - print ("mp_summary['%s']=%s " % (k, v)) - - print ("===== CHILDREN PROCESS SUMMARY ===") - for (k, v) in mp_stat['cp_summary'].items(): - print ("cp_summary['%s']=%s " % (k, v)) - - -################## children tools ###################### -def children_working(ppid): - """ ps statistics for children of ppid. returns False if no children exist """ - out = subprocess.getoutput("ps --ppid %i -o pid,state,vsize,rss,sz,start,cputime,etime" % ppid) - ps_lines = out.splitlines() - ps_lines.pop(0) - - if len(ps_lines) > 0: - for line in ps_lines: - ps_str = line.split() - pid = int(ps_str[0]) - if pid in mp_stat["cpid"].keys(): - mp_stat["cpid"][pid].add_ps_line(line) - #print ("child_stat.appended for kid: %i" % pid ) - return True #ps returns something -> children still exist - else: - print (" mpMonTools.children_working: no children exist for parent: %i" % ppid) - return False #ps returns nothing -> children either weren't born or died. - return False - -def summarize_children_stat(): - """trim finished worker information at the end of statistics """ - for pid in mp_stat["cpid"].keys(): - mp_stat['cpid'][pid].trim_stat() - mp_stat['cpid'][pid].summarize() - -def summarize_proc_stat(): - """trim finished worker information at the end of statistics """ - mp_stat['mpid'].summarize() - for pid in mp_stat['cpid'].keys(): - mp_stat['cpid'][pid].trim_stat() - mp_stat['cpid'][pid].summarize() - -def children_born(log, mpid, np): - """ figures out weather the np kids were spawned for mother mpid""" - sc,out = subprocess.getstatusoutput("ps --ppid %i -o pid,start" % mpid) - if sc is not 0: - print (" mpMonTools.children_born: no kids yet... Error, sc=%i" % sc) - return False - - ps_lines = out.splitlines() - #print ("ps_lines=", ps_lines) - nc = len(ps_lines)-1 - - print (" children_exist: nbr of children = [%i]" % nc) - if grepValue(log, "FIRSTEVENT_ELAP_TIME") is None: - return False - else: - pass - - if nc>=np : #nbr of children is equal to nbr of procs required - print ("%i children workers forked! Registering them (creating ProcDicts) ..." % np) - ps_lines.pop(0) - for line in ps_lines: - ps_str = line.split() - pid = int(ps_str[0]) - print ("child [%i] born" % pid, ) - if grepExist(log, "%i-%i" % (mpid, pid)): - ProcDict(pid, start_time = _seconds(ps_str[1])) - print ("..... child WORKER [%i] added" % pid) - return True - else: - print ("no children exist for parent: %s " % mpid) - return False - - -################ LOG VARIABLES PROCESSING: Grepping ################## -def grepExist(log, field): - """grep check for the existance of the unique field in the log - """ - #print ("grepping %s in %s" % (field, log)) - sc,out = subprocess.getstatusoutput( "grep %s %s" % (field, log)) - if sc==256: - print ("grepExist: FALSE: grep %s %s failed with sc=%i" % (field, log, sc)) - return False - line = out.splitlines()[0] - print ("grepExist: TRUE: sc=%i grepped-line=%s" % (sc,line)) - return True - -def grepValue(log, field, sep='='): - """grep unique field in the log and find corresponding value by regexp - Example: out = 'Py:EventLoopMgr INFO EvtMax = 123456 something' - grepValue(log, "EvtMax", sep="=") = '123456' - """ - sc,out = subprocess.getstatusoutput( "grep %s %s" % (field, log)) - if sc!=0: - print ("grepping %s in %s failed" % (field, log)) - return None - line = out.splitlines()[0] - print ("grepped-line=%s" % line) - import re - vexpr = '\\s*'+ sep+ '\\s*(\\d*\\.?\\d+)' #vexpr = '\\s*'+ sep+ '\\s*(\\d+)' - m = re.search( field + vexpr, line) - value = m.group(1) - print ("grepValue:[%s], line=%s" % (value,line)) - return value - -def grepValueList(log, search_str, field ='', sep='='): - """grep unique field in the log and find corresponding value by regexp - Example: out = 'Py:EventLoopMgr INFO EvtMax = 123456 something' - grepValue(log, "EvtMax", sep="=") = '123456' - """ - sc,out = subprocess.getstatusoutput( "grep %s %s" % (search_str, log)) - if sc!=0: - print ("grepping %s in %s failed with sc=%s" % (search_str, log, sc), "out=%s" % out) - return [] - - if field =='': - field = search_str - - value_list = [] - - #print ("grepped lines = %s" % out) - - import re - vexpr = '\\s*'+ sep+ '\\s*(\\d*\\.?\\d+)' - for line in out.splitlines(): - print ("grepped-line=%s" % line) - m = re.search( field + vexpr, line) - value = m.group(1) - print ("grepValue:[%s], line=%s" % (value,line)) - value_list.append(value) - return value_list - -def grepPath(log, field, sep=':'): - """grep unique field in the log and find corresponding value by regexp - Example: out = 'Py:EventLoopMgr INFO master workdir: /tmp/athena-mp-tmp-tmmous/22590-1261097934 smthng' - grepPath(log, "workdir", sep=":") = '/tmp/athena-mp-tmp-tmmous/22590-1261097934' - """ - sc,out = subprocess.getstatusoutput( "grep %s %s" % (field, log)) - if sc!=0: - print ("grepping %s in %s failed" % (field, log)) - return None - line = out.splitlines()[0] - import re - vexpr = '\\s*'+ sep+ '\\s*([^\\s]+)' - m = re.search( field + vexpr, line) - path = m.group(1) - return path - - -def grepWorkerPathes(log, field = "WORKER_STDOUT", sep=':'): - sc,out = subprocess.getstatusoutput( "grep %s %s" % (field, log)) - if sc!=0: - print ("grepping %s in %s failed" % (field, log)) - return None - - workers = dict() - - #pids = grepValueList(log, "WORKER_PID") - #if len(pids)==0: - # return [] - - import re - vexpr = '\\s*'+ sep+ '\\s*([^\\s]+)' - - for line in out.splitlines(): - rout = re.search( field + vexpr, line) - rpid = re.search( 'WORKER_PID' + '\\s*'+ '=' + '\\s*([^\\s]+)', line) - path = rout.group(1) - pid = rpid.group(1) - - workers[pid] =path - - return workers - -def grepWorkerStat(log, search_str = "WORKER_EVENT_STAT", fields=['elap_time',], sep='='): - sc,out = subprocess.getstatusoutput( "grep %s %s" % (search_str, log)) - if sc!=0: - print ("grepping %s in %s failed" % (search_str, log)) - return None - - worker_stat = dict() - for field in fields: - worker_stat[field]=list() - - #pids = grepValueList(log, "WORKER_PID") - #if len(pids)==0: - # return [] - - import re - vexpr = '\\s*'+ sep+ '\\s*([^\\s]+)' - - for line in out.splitlines(): - for field in fields: - rfield = re.search( field + vexpr, line) - value = rfield.group(1) - worker_stat[field].append( value ) - - return worker_stat - -def extractWorkersStat(mlog): - """extract event based statistics of the WORKER using methods implemented above""" - paths_dict = grepWorkerPathes(mlog) - worker_stat = dict() - for pid,path in six.iteritems(paths_dict): - worker_stat[pid] = grepWorkerStat(path, fields=['evt', 'cpu', 'elap_time', 'elap_os_time', 'system_time', 'user_time']) - return worker_stat - -def writeOutWorkersStat(mlog): - ws = extractWorkersStat(mlog) - import os - ppid = grepValue(mlog, "PARENT_PID") - dir = "stat-%s" % ppid - - if os.path.exists(dir): - import shutil - shutil.rmtree(dir) - - os.mkdir("stat-%s" % ppid ) - os.chdir("stat-%s" % ppid ) - - for pid,stat in six.iteritems(ws): - rows = list() - for i in range( 1 + len(stat['evt'])): - rows.append(''); - - for field,values in six.iteritems(stat): - rows[0] = "%s\t%s" % (rows[0], field) - i=1 - for value in values: - rows[i] = "%s\t%s" % (rows[i], value) - i+=1 - f_name = "WORKER_%s_STAT.txt" % pid - f = open(f_name, 'w') - for row in rows: - f.write(row + "\n") - - del rows - f.close() - print (" worker-stat file ./%s/%s created." % (dir, f_name) ) - - os.chdir("..") - - -############# related to athena-mp ######################### -def launch_athenaMP2(cmd, job, np, ne): - """"launching cmd: athena.py --nprocs=$np -c EvtMax=$ne $jobo 1> mp.output/stdout_$jobo.$np.$ne 2> mp.output/stderr_$jobo.$np.$ne""" - - print ("job command and options as template: %s" % cmd) - from string import Template - arg_template= Template(cmd) - arg_str = arg_template.substitute(MAXEVT=np*ne, NPROCS=np, JOBO=job) - proc_args = arg_str.split(); - - output_dir = "mp.output" - if not os.path.isdir(output_dir): - os.mkdir(output_dir) - - stdout_name = os.path.join(output_dir, "stdout.mp.%s.%i.%i" % (job,np,ne)) - stderr_name = os.path.join(output_dir, "stderr.mp.%s.%i.%i" % (job,np,ne)) - - if not os.path.exists(job): - print ("job options file %s doesn't exist" % job) - return None - import shutil - shutil.copy(job, output_dir) - - - STDOUT_FILE = open(stdout_name , "w") - STDERR_FILE = open(stderr_name, "w") - - print ("<<<LAUNCH>>>: %s" % proc_args) - mproc = subprocess.Popen( proc_args, - stdout=STDOUT_FILE, - stderr=STDERR_FILE, - cwd = output_dir, - shell=False, - close_fds = True) - - pid_list.append(mproc.pid) - STDOUT_FILE.close(); STDERR_FILE.close() - return mproc - -def launch_athenaMP(jobo, np, ne): - """"launching cmd: athena.py --nprocs=$np -c EvtMax=$ne $jobo 1> mp.output/stdout_$jobo.$np.$ne 2> mp.output/stderr_$jobo.$np.$ne""" - - output_dir = "mp.output" - stdout_name = os.path.join(output_dir, "stdout.mp.%s.%i.%i" % (jobo,np,ne)) - stderr_name = os.path.join(output_dir, "stderr.mp.%s.%i.%i" % (jobo,np,ne)) - print ("launching: athena.py --nprocs=%i -c EvtMax=%i %s \ - 1> %s 2> %s" % (np, np*ne, jobo, stdout_name, stderr_name)) - - if not os.path.exists(jobo): - print ("job options file doesn't exist") - return None - - if not os.path.isdir(output_dir): - os.mkdir(output_dir) - - STDOUT_FILE = open(stdout_name , "w") - STDERR_FILE = open(stderr_name, "w") - mproc = subprocess.Popen(["athena.py", "--nprocs=%i" % np, "-c", "EvtMax=%i" % (np*ne) , "../%s" % jobo], - executable='athena.py', - stdout=STDOUT_FILE, - stderr=STDERR_FILE, - cwd = output_dir, - shell=False, - close_fds = True) - pid_list.append(mproc.pid) - STDOUT_FILE.close(); STDERR_FILE.close() - return mproc - -############# TERMINATOR AREA: stopping, killing, terminating processes ############### - -def stop_proc(proc): - """ terminate/kill a process by either proc_object or pid""" - pid = 0 - try: - if type(proc) is int: - pid = proc - os.kill(pid, signal.SIGKILL); #os.waitpid(pid, 0); - else: - pid = proc.pid - if proc.poll() is None: os.kill(pid, signal.SIGKILL); - proc.wait(); - print ("process %s terminated" % pid ) - except Exception as e: - print ("## Caught exception [%s] !!" % str(e.__class__)," ## What:",e) - print (sys.exc_info()[0], sys.exc_info()[1]) - return False - pid_list.remove(pid) - return True - -def stop_proc_tree(pid): - """ Terminate/kill recursively process tree by pid. Be precautious using this!""" - out = subprocess.getoutput("ps --ppid %i" % pid) - lines = out.splitlines(); lines.pop(0) #remove header - try: - if len(lines) > 0: - for line in lines: - cpid = int(line.split()[0]) - print ("child [%i:%i] being terminated..." % (pid, cpid)) - stop_proc_tree(cpid) - if pid in pid_list: pid_list.remove(pid) - os.kill(pid, signal.SIGKILL); #os.waitpid(pid, 0); - print ("[%i] - terminated." % pid ) - except Exception as e: - print ("[%i] - dead #while killing caught exception [%s] !!" % (pid, str(e.__class__))," ## What:",e) - #print (sys.exc_info()[0], sys.exc_info()[1]) - return False - return True - - -def stop_athenaMP(mproc): - """ terminate/kill a process by either proc_object or pid""" - try: - pid = 0 - if type(mproc) is int: - pid = mproc - stop_proc_tree(pid) # killing athena-mp mproc-tree - else: - pid = mproc.pid; - if mproc.poll() is None: os.kill(pid, signal.SIGKILL); - mproc.wait(); - print ("process %s terminated" % pid ) - return True - except Exception as e: - print ("## Caught exception [%s] !!" % str(e.__class__)," ## What:",e) - print (sys.exc_info()[0], sys.exc_info()[1]) - return False - return False - - - - -############# sar related wrappers ########################### - -def launch_sar(log, time_step): - """ - launch sar with - `sar -bBcdqrRuvwWy -I SUM -I XALL -n ALL -P ALL` = `sar -A` - """ - sar_args = [ "sar", "-bBrvwu", "-o", log, "%i"% time_step, "0" ] - print ("launching: %s %s %s %s %s %s" % tuple(sar_args) ) - sc,out = subprocess.getstatusoutput('sar -b 1 1') - if sc!=0: - print ('launching failed - sar do not work on this system - please install if available!') - return None - FNULL = open('/dev/null', 'w') - proc = subprocess.Popen(sar_args, - executable="sar", - stdout = FNULL, - stderr = subprocess.STDOUT, - shell=False, - close_fds = True - ) - - FNULL.close() - print ("sc=%i" % sc) - print ("out=%s" % out) - - pid_list.append(proc.pid) - return proc - -def _num(str): - """try converting str into int or float if fails return the same string""" - try: - if "." in str: - out = float(str) - else: - out = int(str) - except ValueError: - return str - return out - -def get_sar_stat(log, key): - """ get statistics by issueing this cmd: `sar -key $log`""" - print ('launching cmd: sar %s -f %s' % (key, log) ) - sc,out = subprocess.getstatusoutput("sar %s -f %s" % (key,log) ) - if sc!=0: - print ("launching failed - either file %s does not exist or sar does not work on this system - please check!" % log) - return None - sar_dict = dict() - #print"(##################################"; print "out=\n", out; print "################################################") - - lines = out.splitlines() - print ("trim1=", lines.pop(0))#trimming output - print ("trim2=", lines.pop(0))#trimming output - - avg_line = lines.pop(); #trimming avg line at the end - print ("avg_line1=", avg_line) - - hstrs = lines.pop(0).replace('%', 'p').replace('/', 'p').split() #trimming header strings and replacing '%' and '/' to satisfy ROOT - hstrs[0] = "Time" - print ("Sar statistics fields found: ", hstrs) - - #print("##################################"); print ("lines=\n", lines; print "################################################)" - - for hstr in hstrs: - sar_dict[hstr] = list() - for line in lines: - lstrs = line.split() - print ("lstrs=", lstrs) - for i,hstr in enumerate(hstrs): - if i!=0: - sar_dict[hstr].append( _num(lstrs[i]) ) - else: - sar_dict[hstr].append(_seconds(lstrs[i])) #time conversion from "H:M:S" --> numero segundos - return sar_dict - - - -### Tools for process statistics ####### - -def get_full_sar_stat(log): - mp_stat["io"] = get_sar_stat(log, "-b") - mp_stat["mem"] = get_sar_stat(log, "-r") - mp_stat["cpu"] = get_sar_stat(log, "-u") - #return mp_stat - - - ##############sysstat and other linux commands wrappers######## - -def _meminfo(): - out=subprocess.getoutput("cat /proc/meminfo") - lines = out.splitlines() - mem=dict() - Kb = 1024 - mem['total']= int(lines[0].split()[1]) / Kb - mem['free'] = int(lines[1].split()[1]) / Kb - mem['buffers']= int(lines[2].split()[1]) / Kb - mem['cached'] = int(lines[3].split()[1]) / Kb - print ("meminfo.real_total: [%i Mb]", mem['total'] ) - print ("meminfo.free: [%i Mb]", mem['free']) - print ("meminfo.cached: [%i Mb]", mem['cached'] ) - print ("meminfo.buffers: [%i Mb]", mem['buffers']) - return mem - -def _get_iostat(): - out=subprocess.getoutput("iostat") - io = dict() - lines = out.splitlines() - strs = lines[1].split() - io['used'] = int(strs[2]) - mem = dict() - mem['free'] = int(strs[3]) - mem['cached'] = int(strs[5]) - mem['buffers'] = int(strs[6]) - mem_strs = lines[2].split() - mem['USED'] = int(strs[2]) - mem['FREE'] = int(strs[3]) - #print ("mem: [%s Mbs]" % mem) - return io -def _used_mem(): - out=subprocess.getoutput("free -m") - mem_strs = out.splitlines()[2].split() - used_mem = int(mem_strs[2]) - print ("used_mem: [%i Mb]" % used_mem) - return used_mem -def _free_mem(): - out=subprocess.getoutput("free -m") - mem_strs = out.splitlines()[2].split() - free_mem = int(mem_strs[3]) - print ("free_mem: [%i Mb]" % free_mem) - return free_mem - -def _launch_iostat(log, time_step): - print ('launching cmd: iostat $TIME_STEP -d -x > iostat.$jobo.$np.$ne &') - sc,out = subprocess.getstatusoutput( "iostat" ) - if sc!=0: - print ('launching failed - iostat do not work on this system') - return None - f_iostat = open(log, "w") - iostat_proc = subprocess.Popen( - [ "iostat", "%i" % time_step, "-d", "-x"], - executable="iostat", - stdout = f_iostat, - shell=False, - close_fds = True) - - f_iostat.close() - return iostat_proc -def _launch_vmstat(log, time_step): - print ('launching cmd: vmstat $TIME_STEP -n > vmstat.$jobo.$np.$ne &' ) - sc,out = subprocess.getstatusoutput( "vmstat -V" ) - if sc!=0: - print ('launching failed - vmstat do not work on this system') - return None - file = open(log, "w") - proc = subprocess.Popen([ "vmstat", "%i" % time_step, "-n" ], - executable="vmstat", - stdout = file, - shell=False, - close_fds = True) - file.close() - return proc -def __create_childProcDicts(ppid): - """ creates stats dictionary with """ - out = subprocess.getoutput("ps --ppid %i -o pid, start" % ppid) - ps_lines = out.splitlines() - ps_lines.pop(0) - - if len(ps_lines) > 1: - - for line in ps_lines: - ps_str = line.split() - pid = int(ps_str[0]) - ProcDict(pid, start_time = _seconds(ps_str[1])) - print ("ppid: [%i]: child [%i] added" % (ppid, pid)) - else: - print ("no children exist for parent: %s " % ppid) - - -####### adopted from AthenaMP/PyComps ################### -def print_shared_private(pid): - print ("CPROC-SHARED_PRIVATE_MEM for pid: [%i]" % pid) - for line in open("/proc/%i/status" % pid): - if line.startswith('Vm'): - print(line.strip()) - private,shared=_get_shared_private_mem() - print ("pid:[%i] ===> private: %s MB | shared: %s MB" % (pid, private/1024., shared /1024.)) -def _get_shared_private_mem(pid='self'): - """ Finds proc's shared and private memory size from /proc/pid/statm and /proc/pid/smaps dir - Coppied from AthenaMP/PyComps.py""" - global have_pss - private_lines=[] - shared_lines=[] - pss_lines=[] - statm_name = "/proc/%s/statm" % pid - smaps_name = "/proc/%s/smaps" % pid - rss=int(open(statm_name).readline().split()[1])*PAGESIZE - if os.path.exists(smaps_name): #stat - for line in open(smaps_name).readlines(): #open - if line.startswith("Shared"): - shared_lines.append(line) - elif line.startswith("Private"): - private_lines.append(line) - elif line.startswith("Pss"): - have_pss=1 - pss_lines.append(line) - shared=sum([int(line.split()[1]) for line in shared_lines]) - private=sum([int(line.split()[1]) for line in private_lines]) - #Note shared + private = rss above - #The rss in smaps includes video card mem etc. - if have_pss: - pss_adjust=0.5 #add 0.5KiB as this average error due to trunctation - Pss=sum([float(line.split()[1])+pss_adjust for line in pss_lines]) - shared = Pss - private - #elif (2,6,1) <= kv <= (2,6,9): - # shared=0 #lots of overestimation, but what can we do? - # private = rss - else: - shared=int(open(statm_name).readline().split()[2]) - shared*=PAGESIZE - private = rss - shared - return (private, shared) - - -################################################# -############# ROOT Output ####################### - -def _createRootFile(outName): - """creating carcasus of report ROOT file""" - print ("create ROOT file...") - from PerfMonAna.PyRootLib import importRoot - from ROOT import TTree - import array - ROOT = importRoot( batch = True ) - outFile = ROOT.fopen( outName, 'RECREATE' ) - outFile.cd("/") - - i = array.array( 'i', [0] ) - d = array.array( 'f', [0.] ) - - outFile.cd() - tree = TTree( "io", "IO statistics tree") - tree.Branch('Time', i, 'int/I') - tree.Branch('tps',d,'float/D') #transfers per second - tree.Branch('rtps',d,'float/D') #read transfers per second - tree.Branch('wtps',d,'float/D') #write transfers per second - tree.Branch('breadps',d,'float/D') #blocks read per second - tree.Branch('bwrtnps',d,'float/D') #blocks written per second - tree.Write(); - - outFile.cd() - tree = TTree( "mem", "Mem statistics tree") - tree.Branch('Time', i, 'int/I') - tree.Branch('kbmemfree', i, 'int/I') #free in kB - tree.Branch('kbmemused', i, 'int/I') #used in kB - tree.Branch('pmemused', d, 'float/D') #used in kB - tree.Branch('kbbuffers', i, 'int/I') #buffers in kB - tree.Branch('kbcached', i, 'int/I') #cached in kB - tree.Branch('kbswpfree', i, 'int/I') #swap free in kB - tree.Branch('kbswpused', i, 'int/I') #swap used in kB - tree.Branch('pswpused', d, 'float/D') - tree.Branch('kbswpcad', i, 'int/I') - tree.Write(); - - outFile.cd() - tree = TTree("cpu", "CPU statistics tree") - tree.Branch('Time', i, 'int/I') - tree.Branch('CPU', i,'int/I') #CPU number or ALL - tree.Branch('puser',d,'float/D') # CPU utilization percentage at user level - tree.Branch('pnice',d,'float/D') # CPU utilization at nice level - tree.Branch('psystem',d,'float/D') # CPU utilization at system level - tree.Branch('piowait',d,'float/D') # CPU idle percentage due to IO-wait - tree.Branch('psteal',d,'float/D') # virtual processes wait percentage - tree.Branch('pidle',d,'float/D') # CPU idling due to non IO reasons - tree.Write(); - - outFile.cd() - tree = TTree( "cp", "Children processes statistics") - tree.Branch('Time', i, 'int/I') # start date/time - tree.Branch('pid', i , 'int/I') # pid of worker process - tree.Branch('state', i , 'int/I') #state of the process at the moment (important at the end of process) - tree.Branch('vmem', i, 'int/I') - tree.Branch('rss', i, 'int/I') - tree.Branch('sz', i, 'int/I') # Size in physical pages of the core image of the process. This includes text, data, and stack space. - tree.Branch('shared', i, 'int/I')# shared memory as extracted from /proc/pid/smaps - tree.Branch('private', i, 'int/I')# private memory as extracted from /proc/pid/smaps - tree.Branch('cpu_time', i,'int/I')# cpu_time - tree.Branch('elap_time', i, 'int/I')# elapsed time (Wall Clock time) - tree.Write() - - outFile.cd() - tree = TTree( "cp_summary", "children processes summary tree") - tree.Branch('pid', i, 'int/I') - tree.Branch('elap_time', i , 'int/I') - tree.Branch('cpu_time', i, 'int/I') - tree.Write(); - - outFile.cd() - tree = TTree( "mpid", "Mother Process statistics tree") - tree.Branch('pid', i , 'int/I') - tree.Branch('state', i , 'int/I') - tree.Branch('vmem', i, 'int/I') - tree.Branch('rss', i, 'int/I') - tree.Branch('sz', i, 'int/I') - tree.Branch('shared', i, 'int/I') - tree.Branch('private', i, 'int/I') - tree.Branch('cpu_time', i,'int/I') - tree.Branch('elap_time', i, 'int/I') - tree.Write() - - outFile.cd() - tree = TTree( "mp_summary", "Mother process summary tree") - tree.Branch('pid', i, 'int/I') - tree.Branch('m_elap_time', i , 'int/I') #mother process total elapsed time - tree.Branch('m_cpu_time', i, 'int/I') #mother process CPU-time (children-worker's time not included) - tree.Branch('m_firstevent_time', i, 'int/I') #time spent on firstEvents(after init before forking) - measured inside mother process - tree.Branch('m_par_time', i, 'int/I') #parallel time - time from forking till collecting data - measured inside mother process - tree.Branch('x_init_time', i, 'int/I') #externally observed MP SERIAL-INIT executin time (from start till forking) - tree.Branch('x_par_time', i, 'int/I') #externally observed MP PARALLEL execution time (from forking till joining) - tree.Branch('x_fin_time', i, 'int/I') #externally observed MP FINALIZATION time (from joining till end of job) - tree.Branch('free_mem_spike', i, 'int/I') #spike in "free+buffers+cached" at the end of MP-process -> physical used memory released. - tree.Branch('swap_change', i, 'int/I') # change in swap, should be zero if not means MP causing a swapping - #tree.Branch('used_mem_spike', i, 'int/I') - tree.Branch('event_proc_rate', i, 'float/D') #event processing rate defined as ne*60*m_par_time/np = events/proces/min - tree.Write() - - outFile.cd() - outFile.Write() - outFile.Close() - print ("create ROOT file... [DONE]" ) - return - -def createRootFile(outName, np): - """creating structure of ROOT-report file from mp_stat dictionary """ - print ("create ROOT file...") - - from PerfMonAna.PyRootLib import importRoot - from ROOT import TTree - import array - ROOT = importRoot( batch = True ) - outFile = ROOT.fopen( outName, 'RECREATE' ) - outFile.cd("/") - - i = array.array( 'i', [0] ) - d = array.array( 'f', [0.] ) - - for t in mp_stat.keys(): - tree = TTree( t, "%s stat tree" % t) - tree.Branch('np', i, 'int/I') # each tree will have 'np' branch - for b in mp_stat[t].keys(): - if isinstance(mp_stat[t][b][0], int): - tree.Branch(b, i, 'int/I') - elif isinstance(mp_stat[t][b][0], float): - tree.Branch(b, d,'float/F') - else: - #print ("branch [%s] is not int or float type" % b) - tree.Branch(b, i, 'int/I') - tree.Write() - outFile.Write() - outFile.Close() - print ("create ROOT file... [DONE]") - - -def fillRootTree(tree, stat, np): - #print ("writing %s statistics Tree:" % tree.GetName(), end='') - branches = stat.keys() - #print (" branches=", branches, "...", end='') - nbr = len(branches) - array_list = list() - - np_array = array.array('i', [np]) - tree.SetBranchAddress('np', np_array) #putting 'np' into each tree. - for branch in branches: - if isinstance(stat[branch][0], float): - f = stat[branch][0] - nums = array.array('f', [0.0]) - array_list.append(nums) - elif isinstance(stat[branch][0], int): - i = stat[branch][0] - nums = array.array('i', [0]) - array_list.append(nums) - else: - #print ("branch [%s] is not int or float type" % branch) - nums = array.array('i', [-1]) - array_list.append(nums) - tree.SetBranchAddress(branch, array_list[-1]); - - for index in range(len(stat[branches[0]])): - for array_index, branch in enumerate(branches): - array_list[array_index][0] = stat[branch][index] if array_list[array_index][0] is not -1 else -1 - tree.Fill() - #print ("[DONE]") - - - -def writeRootFile(outName, np): - """writes statistics into ROOT file""" - print ("write ROOT file %s...", outName ) - createRootFile(outName, np) - from ROOT import TFile, TTree - import array - outFile = TFile( outName, 'update' ) - - stat_keys = mp_stat.keys() - #print ("mp_stat.keys()", stat_keys) - for key in stat_keys: - #print (" writing [%s]" % key) - tree = outFile.Get( "%s" % key ) - fillRootTree(tree, mp_stat[key], np) - tree.Write() - - outFile.Write() - outFile.Close() - print ("write ROOT file... [DONE]" ) - return - -def mergeRootFiles(file, ne): - import glob - file_list = glob.glob1(os.getcwd(), "%s.*.%i.root" % (file, ne) ) - cmd = "hadd -f6 mp_stat.%s.ne%i" % (file, ne) - for f in file_list: - cmd = cmd + ' ' + f - sc, out = subprocess.getstatusoutput(cmd) - -def mergeRootOutput(output_file, jobo, np_list, ne): - from ROOT import TFile, TTree - #output_file = "merged.%s.ne%i.root" % (jobo, ne) - cmd = "hadd -f6 %s" % output_file - for np in np_list: - # here we copy mp_summary and cp_summary trees in each root file from /$np dir into root dir for further merging - file = "mp.%s.%i.%i.root" % (jobo, np, ne) - print (" ---> processing file = %s" % file) - #here we form the command for merging - cmd = cmd + " %s" % file - - print ("issuing root files merging command:[%s]" % cmd) - sc, out = subprocess.getstatusoutput(cmd) - return #output_file - -def _createGlobalRootFile(file, ne): - from ROOT import TFile, TTree, Tlist - import glob - file_list = glob.glob1(os.getcwd(), "%s.*.%i.root" % (file, ne) ) - outFile = TFile ("%s.%i.root" % (file, ne), 'RECREATE' ) - for f in file_list: - print ("Copying trees from [%s]" % f) - tf = TFile (f, 'READ' ) - mpt = tf.Get("mp_summary") - cpt = tf.Get("cp_summary") - outFile.cd('/') - dir = "%s" % f.replace(file, "").split(".")[1] - print (" creating dir for np = %s" % dir) - outFile.mkdir(dir) # creating dir for np - outFile.cd(dir) - mpTree = mpt.CloneTree(); mpTree.Write() - cpTree = cpt.CloneTree(); cpTree.Write() - outFile.Write() - tf.Close() - - outFile.cd('/') - - ikeys = outFile.GetListOfKeys().MakeIterator() - key = ikeys.Next() - key_list = list() - - while key is not None: - key_list.append(key.GetName()) - - for np in key_list: - outFile.Get("%s/mp_summary") - - -def report2(root_file, ne = 0, comments=""): - print(' mpMonTools.report(): root_file=', root_file) - from ROOT import TFile, TTree, TBranch, TCanvas, TPad, TGraph, TLegend, TMultiGraph, gStyle, TLatex, TPaveLabel, TPaveText, TH2I, TMath - - def getTreeList(tree, column, condition): - size = tree.Draw(column, condition,'goff'); - v1 = tree.GetV1(); v1.SetSize(size) - return list(v1) - - def makeGraph(tree, name, formula, condition="", color = 1, lineWidth=1): - tree.Draw(formula, condition, "goff") - graph = TGraph(int(tree.GetSelectedRows()), tree.GetV2(), tree.GetV1()) - graph.SetLineColor(color); - graph.SetLineWidth(lineWidth) - graph.SetName(name); - return graph - - def MakeMultiGraph(graph_data, mg, l): - clr = 1 - gl = list() - - if graph_data is None: - return [] - - if graph_data['type'] is 'graph': - for name, (tree, param) in graph_data['data'].items(): - clr+=1; formula =''; condition='' - if type(param).__name__=='tuple': - formula = param[0] - condition = param[1] - else: - print ("MakeMG: ", formula, condition) - formula = param - condition = "" - - print ("name=%s, tree=%s, formula=%s, condition=%s" % (name, tree.GetName(), formula, condition) ) - - #g = makeGraph(tree, name, formula, condition, color=clr) - tree.Draw(formula, condition, "goff") - g = TGraph(int(tree.GetSelectedRows()), tree.GetV2(), tree.GetV1()); gl.append(g) - - g.SetName(name); g.SetLineColor(clr); g.SetMarkerColor(clr); g.SetLineWidth(0) - if name == "worker_rate": - g.SetLineColor(10) - - mg.Add(g); - l.AddEntry(g, name) - - if graph_data['type'] is 'list': - for name, (lx,ly) in graph_data['data'].items(): - print ("name=%s" % name); print (lx); print (ly) - clr+=1 - g = TGraph( len(lx), array.array('f', lx), array.array('f', ly) ) - g.SetName(name); g.SetLineColor(clr); g.SetLineWidth(1); g.SetMarkerColor(clr); - mg.Add(g) - l.AddEntry(g, name) - gl.append(g) - - if graph_data['type'] is 'array': - clr = 1 - g_list = list() - data = graph_data['data'] - for name,(x,y) in graph_data['data'].items(): - print (x); print (y) - clr+=1; - g = TGraph(len(x), x, y) - g.SetName(name); g.SetLineColor(clr); g.SetLineWidth(1); g.SetMarkerColor(clr) - gl.append(g) - mg.Add(g); - l.AddEntry(g, name) - if graph_data['type'] is 'text': - title.DrawPaveLabel(0.1,0.93,0.9,0.99, graph_data['title'], "brNDC") - for s in graph_data['data']: - print ("graph_data['data']=%s" % s) - mp_pt.AddText(s) - mp_pt.SetTextAlign(12); - mp_pt.SetTextSize(0.04) - mp_pt.Draw() - return [] - - if graph_data['type'] is 'latex': - title.DrawPaveLabel(0.1,0.93,0.9,0.99, graph_data['title'], "brNDC") - tl = TLatex(); tl.SetTextSize(0.02); tl.SetTextAlign(12); - txtd = graph_data['data'] - i = 0; x0 = 0.05; y0 = 0.90; dx = 0.08; dy = 0.05 - x1 = x0 - tl.DrawLatex(x1, y0, 'np') - for s in txtd['np']: - x1 = x1 + dx - tl.DrawLatex(x1, y0, s) - txtd.pop('np') - - for k in txtd.keys(): - y0 = y0 - dy - tl.DrawLatex(x0-0.03, y0, k); - x1 = x0 - for s in txtd[k]: - x1 = x1 + dx - tl.DrawLatex(x1, y0, s) - return [] - if 'goptions' in graph_data.keys(): - mg.Draw(graph_data['goptions']) - else: - mg.Draw('ALP') - - l.Draw() - h=mg.GetHistogram(); h.SetXTitle(graph_data['xtitle']); h.SetYTitle(graph_data['ytitle']); h.SetMinimum(0.1); - title.DrawPaveLabel(0.1,0.91,0.9,0.99, graph_data['title'], "brNDC") - if 'text' in graph_data.keys(): - title.DrawPaveLabel(0.2,0.88,0.8,0.92, graph_data['text'], "brNDC") - #text_box = TPaveText(0.2,0.51,0.8,0.54); text_box.AddText(graph_data['text']); - #text_box.SetFillColor(42); text_box.SetTextAlign(12); text_box.SetTextfONt(40); text_box.Draw(); - - return [] # gl #list of TGraph - - c = TCanvas("mpr", "AthenaMP-mp-scaling-charts", 10, 10, 800, 1024) - c.SetFillColor(17); c.SetBorderSize(1); c.cd() - - tfile = TFile(root_file, "READ"); print (" root compression factor = ", tfile.GetCompressionFactor()) - mpSumTree = tfile.Get("mp_summary") - cpSumTree = tfile.Get("cp_summary") - ioTree = tfile.Get("io") - cpuTree = tfile.Get("cpu") - numaTree = tfile.Get("numa") - - if ne is 0: - ne = int(root_file.split('.')[-2].replace('ne', '')) - print ("extracted ne=[%i]" % ne) - -##### FORMING THE DATA FOR ROOT Graphing-Charting-Histogramming ##### - np_list = list(set(getTreeList(cpSumTree, 'np', ''))); np_list.sort() #uniqeify and sort np_list - elap_time_stdev = list() - elap_time_avg = list() - elap_time_max = list() - cpu_time_stdev = list() - cpu_time_max = list() - cpu_time_avg = list() - - elap_time_rate = list() - cpu_time_rate = list() - user_time_rate = list() - par_event_rate = list() - - elap_cpu_time = list() # elap - cpu time avg. - - elap_time_stdev_x = list() - cpu_time_stdev_x = list() - par_elap_time = list() # elap time as seen from mother - - mp_lb = [b.GetName() for b in list(mpSumTree.GetListOfBranches())] - cp_lb = [b.GetName() for b in list(cpSumTree.GetListOfBranches())] - - mp_txt = "%s" % mp_lb + "\n" - cp_txt = "%s" % cp_lb + "\n" - - mp_pt = TPaveText(0.1,0.1,0.9,0.9) - mp_pt.SetFillColor(22) - - mp_latex = TLatex() - mp_latex.SetTextAlign(12) - - txt_dict=dict() - ltxt_dict = dict() - for s in mp_lb: - txt_dict[s] = "%20s" % s - ltxt_dict[s] = list() - np_txt = "" - for np in np_list: - size = cpSumTree.Draw('elap_time:cpu_time:elap_time-cpu_time:user_time', "np==%i" % int(np), 'goff'); - elapv = cpSumTree.GetV1(); - cpuv = cpSumTree.GetV2(); - elap_cpuv = cpSumTree.GetV3(); - userv = cpSumTree.GetV4(); - - elap_time_stdev.append(float(TMath.RMS(size, elapv ))) - elap_time_avg.append(float(TMath.Mean(size, elapv ))) - elap_time_max.append(float(TMath.MaxElement(size, elapv ))) - - cpu_time_stdev.append(float(TMath.RMS(size, cpuv ))) - cpu_time_avg.append(float(TMath.Mean(size, cpuv ))) - cpu_time_max.append(float(TMath.MaxElement(size, cpuv ))) - - elap_cpu_time.append(float(TMath.Mean(size, elap_cpuv))) - - #elap_time_rate.append( float(60*ne)/float(np*elap_time_avg[-1]) ) - #cpu_time_rate.append( float(60*ne)/float(np*cpu_time_avg[-1]) ) - #user_time_rate.append( float(60*ne)/float( np * float(TMath.Mean(size, userv))) ) - - elap_time_rate.append( float(60*ne)/float(elap_time_avg[-1]) ) - cpu_time_rate.append( float(60*ne)/float(cpu_time_avg[-1]) ) - user_time_rate.append( float(60*ne)/float(float(TMath.Mean(size, userv))) ) - - #elap_time_stdev_x.append(float(TMath.RMS(size, cpSumTree.GetV2()))) - #cpu_time_stdev_x.append(float(TMath.RMS(size, cpSumTree.GetV4()))) - - msize = mpSumTree.Draw('m_par_time', "np==%i" % int(np), 'goff') - parv = mpSumTree.GetV1(); - par_elap_time.append(float(TMath.Mean(msize, parv))) - par_event_rate.append(float(60.0*ne)/par_elap_time[-1]) - - np_txt += "%10s" % np - for s in mp_lb: - txt_dict[s] += "%10.1f" % getTreeList(mpSumTree, s, "np==%i" % int(np) )[0] - ltxt_dict[s].append( "%10.1f" % getTreeList(mpSumTree, s, "np==%i" % int(np))[0] ) - - print ("np_list=%s\n etime_stdev=%s \n cpu_time_stdev=%s" % (np_list, elap_time_stdev, cpu_time_stdev)) - print ("elap-cpu=%s" % (elap_cpu_time)) - - #mn = 7; mt=10000 - from socket import gethostname - import platform -######################### - graph_list = list() -########################### - graph =dict() - graph['type'] = 'text' - graph['data'] = [ - "MP Times, Memory, IO, CPU PLOTS for ", - " %s " % root_file, - " machine: %s" % gethostname(), - #"%s, %s, %s, %s, %s, %s" % platform.uname(), - "%s" % platform.platform(), - "%s" % os.getenv('CMTCONFIG'), - " comments: %s" % comments, - "np=%s " % [int(s) for s in np_list] - ] - graph['title'] = 'ATHENA MP MONITOR mpMon REPORT' - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'latex' - graph['data'] = ltxt_dict - graph['title']= "mp_summary numbers:" - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'list' - graph['data'] = { - 'parallel_elap': (np_list, par_elap_time), - 'worker_elap_avg': (np_list, elap_time_avg), - 'worker_elap_max': (np_list, elap_time_max), - 'worker_cpu_avg': (np_list, cpu_time_avg), - 'worker_cpu_max': (np_list, cpu_time_max), - #'elap_time_stdev_x': (np_list, elap_time_stdev_x), - #'cpu_time_stdev_x': (np_list, cpu_time_stdev_x) - } - graph['title'] = "Parallel and Workers Elap and CPU Time Variations" - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Time, sec" - graph_list.append(graph) - -############################ - graph = dict() - graph['type'] = 'graph' - graph['data'] = { - "m_elap_time" : (mpSumTree, "m_elap_time:np"), - "m_firstevent_time": (mpSumTree, "m_firstevent_time:np"), - "m_par_time" : (mpSumTree, "m_par_time:np"), - "m_merging_time": (mpSumTree, "m_merging_time:np") - } - graph['title'] = "PARENT PROCESS TIMES" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Time, sec" - graph_list.append(graph) -############################## - """ - graph =dict() - graph['type'] = 'text' - graph['data'] = [ - "This plot intentially left blank" - ] - graph['title'] = 'BLANK CHART' - graph_list.append(graph) - """ -############################ - graph = dict() - graph['type'] = 'list' - graph['data'] = { - 'elap_time_stdev': (np_list, elap_time_stdev), - 'cpu_time_stdev': (np_list, cpu_time_stdev), - 'elap-cpu_avg': (np_list, elap_cpu_time), - } - graph['title'] = "Worker Processes Time Deviations" - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Time_StDev, sec" - graph_list.append(graph) - -############################ - graph = dict() - graph['type'] = 'graph' - graph['data'] = { - #"event_rate": (mpSumTree, "event_rate:np"), - #"event_rate_x": (mpSumTree, "(60*%i)/(x_par_time):np" % ne), - #"event_rate" : (mpSumTree, "(60*%i)/(m_par_time):np" % ne), - #"per_proc_rate": (mpSumTree, "event_proc_rate:np") - "event_rate" : (mpSumTree, "(60*%i*np)/(m_par_time):np" % ne), - "per_proc_rate": (mpSumTree, "event_proc_rate*np:np") - #"per_proc_rate_x": (mpSumTree, "(60*%i)/(x_par_time*np):np" % ne) - } - graph['title'] = "ATHENA MP EVENT PROCESSING RATE" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Evts/min, Evts/proc/min" - graph_list.append(graph) -############################ - - graph = dict() - graph['type'] = 'graph' - graph['data'] = { - 'parallel_rate': (mpSumTree, "event_rate:np"), #"(event_rate + 207/(30*(np-2)+m_par_time)):np"), - 'worker_rate': (cpSumTree, "(60*%i)/(elap_time):np" % ne), - #'cpu_rate': (cpSumTree, "(60*%i)/(cpu_time):np" % ne), - #'user_rate': (cpSumTree, "(60*%i)/(user_time):np" % ne) - } - graph['title'] = "Workers and parallel event processing rates, wall-clock time" - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Evts/Proc/Min" - graph['goptions'] = "ALP" - graph_list.append(graph) -############################ - - graph = dict() - graph['type'] = 'list' - graph['data'] = { - 'par_event_rate': (np_list, par_event_rate), - 'elap_avg_rate': (np_list, elap_time_rate), - 'cpu_avg_rate': (np_list, cpu_time_rate), - 'user_avg_rate': (np_list, user_time_rate) - } - graph['title'] = "Worker event processing rates Wall-Time, User-CPU, Total-CPU time rates, averaged." - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Evts/Proc/Min" - graph_list.append(graph) - -############################# - graph = dict() - graph['type'] = 'graph' - graph['data'] ={ - "total_mem": (mpSumTree, "free_mem_spike/1024:np"), - "mem_per_proc": (mpSumTree, "free_mem_spike/np/1024:np"), - } - graph['title'] = "PHYSICAL MEMORY CONSUMPTION BY AthenaMP" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Memory Consumption, Kb" - graph_list.append(graph) - - cond = "vmem!=0" -############################# - graph = dict() - graph['type'] = 'graph' - graph['data'] ={ - "mem_per_proc": (mpSumTree, "free_mem_spike/np/1024:np"), - "vmem": (cpSumTree, ("vmem:np", cond) ), - "rss": (cpSumTree, ("rss:np", cond) ) - } - graph['title'] = "VMEM, RSS, RealMemory(from free-spike) per Worker" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Memory Consumption, Mb" - graph_list.append(graph) - -############################ - cond = "np>0" -############################ - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - "bread/sec": (ioTree, ('breadps:Time', cond) ), - "bwrite/sec": (ioTree, ('bwrtnps:Time', cond) ) - } - graph['title'] = "IO Activity for Athena MP %s " % cond - graph['xtitle']= "Time" - graph['ytitle']= "Total Amount of Data R/W in blocks per sec" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - "write_reqs/sec": (ioTree, ('wtps:Time', cond) ), - "read_reqs/sec": (ioTree, ('rtps:Time', cond) ), - "total_reqs/sec": (ioTree, ('tps:Time', cond) ) - } - graph['title'] = "IO Activity for Athena MP %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Transfer_requests/sec" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'graph' - graph['data'] ={ - '%_user' : (cpuTree, ('puser:Time', cond) ), - '%_system': (cpuTree, ('psystem:Time', cond) ), - '%_idle': (cpuTree, ('pidle:Time', cond) ), - '%_io_wait': (cpuTree, ('piowait:Time', cond) ) - } - graph['title'] = "CPU Activity for Athena MP %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Percentage of CPU Utilization" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - '%_io_wait': (cpuTree, ('piowait:Time', cond) ), - } - graph['title'] = "CPU Activity for Athena MP %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Percentage of CPU Utilization" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -######################### - -######################### - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - 'numa_hit': (numaTree, ('numa_hit:Time', cond) ), - 'numa_miss':(numaTree,('numa_miss:Time', cond) ), - 'numa_foreign': (numaTree, ('numa_foreign:Time', cond) ), - 'interleave_hit': (numaTree, ('interleave_hit:Time', cond) ), - 'local_node': (numaTree, ('local_node:Time', cond) ), - 'other_node': (numaTree, ('other_node:Time', cond) ), - } - graph['title'] = "NUMA Activity for athena MJ (Multi Jobs) %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Nbr of hits/misses" - graph['text']= "np = %s" % np_list - graph_list.append(graph) - - - - pads = list() - - cpp = 1 #charts per pad - ppc = 2 #pads per canvas - - c.cd() - gStyle.SetOptStat(0); - gStyle.SetPalette(1); - gStyle.SetCanvasColor(33); - gStyle.SetFrameFillColor(10); - gStyle.SetMarkerStyle(21) - gStyle.SetMarkerColor(2) - gStyle.SetMarkerSize(0.4) - print ("gStyle.Set done") - - title = TPaveLabel(0.1,0.98,0.9,1, "Athena MP Plots"); - title.SetFillColor(42); title.SetTextFont(40); - #title.Draw();print ("title Drawn") - - mgs = list() #List of TMultiGraphs - ls = list() #List of TLegends - gs = list() #List of TGraph - - for j in range(ppc): - y_factor = 0.99; x1 = 0.01; x2 = 0.99; y1 = y_factor - (y_factor-0.01)*(j+1)/float(ppc); y2 = y_factor - (y_factor-0.01)*j/float(ppc) - print ("x1,y1,x2,y2", x1, y1, x2, y2 ) - pad = TPad("pad%i" % j, "pad%i" % j, x1, y1, x2, y2, 33); pad.Draw() - pads.append(pad); - - num_cans = len(graph_list) /(cpp*ppc) if len(graph_list) % (cpp*ppc)==0 else len(graph_list)/(cpp*ppc) + 1 - graph_list += [None,]* (num_cans*cpp*ppc - len(graph_list)) - print ("number of pages/canvases in report = ", num_cans) - - pdf_file = root_file - for s in ['merged.', '.py', '.root']: - pdf_file = pdf_file.replace(s, '') - pdf_file ="%s.pdf" % pdf_file - - for i in range(num_cans): - for j in range(ppc): - graph = graph_list[ppc*i+j] - if graph is None: - continue - - if 'logY' in graph.keys(): - if graph['logY']: - pads[j].SetLogy() - else: - pads[j].SetLogy(0) - - pads[j].cd() - pads[j].SetRightMargin(0.2) - l = TLegend(0.82,0.20,0.99,0.89); ls.append(l) - mg = TMultiGraph(); mgs.append(mg) - print ("graph=", graph) - gs.append(MakeMultiGraph(graph, mg, l)) - - c.Update() - if i == 0: - print ("pdf.start") - c.Print(pdf_file+'(', 'pdf') #start page - elif i < num_cans-1: - print ("pdf.body") - c.Print(pdf_file, 'pdf') #body pages - else: - print ("pdf.end") - c.Print(pdf_file + ')', 'pdf') #end page - c.SaveAs("%s.%i.png" % (pdf_file, i)) - for pad in pads: - pad.Clear() - - -def report(root_file, ne = 0, comments=""): - print(' mpMonTools.report(): root_file=', root_file) - from ROOT import TFile, TTree, TBranch, gPad, TCanvas, TPad,TProfile, TGraph, TLegend, TLegendEntry, TMultiGraph, gStyle, TLatex, TPaveLabel, TPaveText, TH2I, TMath - - def getTreeList(tree, column, condition): - size = tree.Draw(column, condition,'goff'); - v1 = tree.GetV1(); v1.SetSize(size) - return list(v1) - - def makeGraph(tree, name, formula, condition="", color = 1, lineWidth=1): - tree.Draw(formula, condition, "goff") - graph = TGraph(int(tree.GetSelectedRows()), tree.GetV2(), tree.GetV1()) - graph.SetLineColor(color); - graph.SetLineWidth(lineWidth) - graph.SetName(name); - return graph - - def MakeMultiGraph(c, pad, graph_data, mg, l, tpl): - clr = 1 - gl = list() - hl = list() - l.SetFillColor(0) - lel = list() - - draw_option="" - if graph_data is None: - return [] - if graph_data['type'] is 'profile': - for name, (tree, param) in graph_data['data'].items(): - clr+=1; formula =''; condition='' - if type(param).__name__=='tuple': - formula = param[0] - condition = param[1] - else: - print ("MakeMG: ", formula, condition) - formula = param - condition = "" - - print ("PROFILE: name=%s, tree=%s, formula=%s, condition=%s" % (name, tree.GetName(), formula, condition) ) - - hprof = TProfile( - "%s" % name, - " %s;%s;%s" % (name, graph_data['xtitle'], graph_data['ytitle']), - 100,0,18, " " - ) - tpl.append(hprof) - tree.Draw("%s>>%s"% (formula, name), condition, "prof") - tpl[-1].SetMarkerColor(clr) - - if draw_option == "": - tpl[-1].Draw() - draw_option="PSAME" - else: - print ("PROFILE: %s 'PSAME' clr=%i " % (name, clr)) - tpl[-1].Draw("PSAME") - le = l.AddEntry(tpl[-1], name) - le.SetFillColor(0) - lel.append(le) - l.Draw() - #gPad.Update() - #c.Update() - return [] - - if graph_data['type'] is 'graph': - for name, (tree, param) in graph_data['data'].items(): - clr+=1; formula =''; condition='' - if type(param).__name__=='tuple': - formula = param[0] - condition = param[1] - else: - print ("MakeMG: ", formula, condition) - formula = param - condition = "" - - if tree is None: - print ("name=%s -> TTree DOESN't EXIST" % name) - continue - - print ("name=%s, tree=%s, formula=%s, condition=%s" % (name, tree.GetName(), formula, condition), end='') - tree.Draw(formula, condition, "goff") - - selection_size = tree.GetSelectedRows() - if selection_size==-1: - print ("-> SKIPPED (DO NOT EXIST): SELECTION_SIZE=%i" % selection_size ) - continue - else: - print ("-> SELECTION_SIZE=%i" % selection_size ) - pass - - g = TGraph(selection_size, tree.GetV2(), tree.GetV1()); gl.append(g) - - g.SetName(name); g.SetLineColor(clr); g.SetMarkerColor(clr); g.SetLineWidth(0) - if name == "worker_rate": - g.SetLineColor(10) - - mg.Add(g); - le = l.AddEntry(g, name) - le.SetFillColor(0) - lel.append(le) - - if graph_data['type'] is 'list': - for name, (lx,ly) in graph_data['data'].items(): - print ("name=%s" % name); print (lx); print (ly) - clr+=1 - g = TGraph( len(lx), array.array('f', lx), array.array('f', ly) ) - g.SetName(name); g.SetLineColor(clr); g.SetLineWidth(1); g.SetMarkerColor(clr); - mg.Add(g) - l.AddEntry(g, name).SetFillColor(0) - gl.append(g) - - if graph_data['type'] is 'array': - clr = 1 - g_list = list() - data = graph_data['data'] - for name,(x,y) in graph_data['data'].items(): - print (x); print (y) - clr+=1; - g = TGraph(len(x), x, y) - g.SetName(name); g.SetLineColor(clr); g.SetLineWidth(1); g.SetMarkerColor(clr) - gl.append(g) - mg.Add(g); - l.AddEntry(g, name).SetFillColor(0) - if graph_data['type'] is 'text': - title.DrawPaveLabel(0.1,0.93,0.9,0.99, graph_data['title'], "brNDC") - for s in graph_data['data']: - print ("graph_data['data']=%s" % s) - mp_pt.AddText(s) - mp_pt.SetTextAlign(12); - mp_pt.SetTextSize(0.04) - mp_pt.Draw() - return [] - - if graph_data['type'] is 'latex': - title.DrawPaveLabel(0.1,0.93,0.9,0.99, graph_data['title'], "brNDC") - tl = TLatex(); tl.SetTextSize(0.02); tl.SetTextAlign(12); - txtd = graph_data['data'] - i = 0; x0 = 0.05; y0 = 0.90; dx = 0.08; dy = 0.05 - x1 = x0 - tl.DrawLatex(x1, y0, 'np') - for s in txtd['np']: - x1 = x1 + dx - tl.DrawLatex(x1, y0, s) - txtd.pop('np') - - for k in txtd.keys(): - y0 = y0 - dy - tl.DrawLatex(x0-0.03, y0, k); - x1 = x0 - for s in txtd[k]: - x1 = x1 + dx - tl.DrawLatex(x1, y0, s) - return [] - - if mg.GetListOfGraphs() is None: - print ("MultiGraph: Empty",) - print ("mg=%s" % mg.GetName()) - return[] - - if 'goptions' in graph_data.keys(): - mg.Draw(graph_data['goptions']) - else: - mg.Draw('ALP') - - l.Draw() - h=mg.GetHistogram(); h.SetXTitle(graph_data['xtitle']); h.SetYTitle(graph_data['ytitle']); h.SetMinimum(0.1); - title.DrawPaveLabel(0.1,0.91,0.9,0.99, graph_data['title'], "brNDC") - if 'text' in graph_data.keys(): - title.DrawPaveLabel(0.2,0.88,0.8,0.92, graph_data['text'], "brNDC") - #text_box = TPaveText(0.2,0.51,0.8,0.54); text_box.AddText(graph_data['text']); - #text_box.SetFillColor(0); text_box.SetTextAlign(12); text_box.SetTextfONt(40); text_box.Draw(); - - return [] # gl #list of TGraph - - c = TCanvas("mp_can", "AthenaMP-mp-scaling-charts", 1, 1, 800, 1024) - c.SetFillColor(0); c.SetBorderSize(1); c.cd() - - tfile = TFile(root_file, "READ"); print (" root compression factor = ", tfile.GetCompressionFactor()) - mpSumTree = tfile.Get("mp_summary") - cpSumTree = tfile.Get("cp_summary") - ioTree = tfile.Get("io") - cpuTree = tfile.Get("cpu") - memTree = tfile.Get("mem") - numaTree = tfile.Get("numa") - print ("numaTree=%s" % numaTree) - - if ne is 0: - ne = int(root_file.split('.')[-2].replace('ne', '')) - print ("extracted ne=[%i]" % ne) - -##### FORMING THE DATA FOR ROOT Graphing-Charting-Histogramming ##### - np_list = list(set(getTreeList(cpSumTree, 'np', ''))); np_list.sort() #uniqeify and sort np_list - elap_time_stdev = list() - elap_time_avg = list() - elap_time_max = list() - cpu_time_stdev = list() - cpu_time_max = list() - cpu_time_avg = list() - - elap_time_rate = list() - cpu_time_rate = list() - user_time_rate = list() - par_event_rate = list() - - elap_cpu_time = list() # elap - cpu time avg. - - elap_time_stdev_x = list() - cpu_time_stdev_x = list() - par_elap_time = list() # elap time as seen from mother - - mp_lb = [b.GetName() for b in list(mpSumTree.GetListOfBranches())] - cp_lb = [b.GetName() for b in list(cpSumTree.GetListOfBranches())] - - mp_txt = "%s" % mp_lb + "\n" - cp_txt = "%s" % cp_lb + "\n" - - mp_pt = TPaveText(0.1,0.1,0.9,0.9) - mp_pt.SetFillColor(0) #22 - - mp_latex = TLatex() - mp_latex.SetTextAlign(12) - - txt_dict=dict() - ltxt_dict = dict() - for s in mp_lb: - txt_dict[s] = "%20s" % s - ltxt_dict[s] = list() - ltxt_dict["total_rate"]=list() - - np_txt = "" - for np in np_list: - size = cpSumTree.Draw('elap_time:cpu_time:elap_time-cpu_time:user_time', "np==%i" % int(np), 'goff'); - elapv = cpSumTree.GetV1(); - cpuv = cpSumTree.GetV2(); - elap_cpuv = cpSumTree.GetV3(); - userv = cpSumTree.GetV4(); - - elap_time_stdev.append(float(TMath.RMS(size, elapv ))) - elap_time_avg.append(float(TMath.Mean(size, elapv ))) - elap_time_max.append(float(TMath.MaxElement(size, elapv ))) - - cpu_time_stdev.append(float(TMath.RMS(size, cpuv ))) - cpu_time_avg.append(float(TMath.Mean(size, cpuv ))) - cpu_time_max.append(float(TMath.MaxElement(size, cpuv ))) - - elap_cpu_time.append(float(TMath.Mean(size, elap_cpuv))) - - #elap_time_rate.append( float(60*ne)/float(np*elap_time_avg[-1]) ) - #cpu_time_rate.append( float(60*ne)/float(np*cpu_time_avg[-1]) ) - #user_time_rate.append( float(60*ne)/float( np * float(TMath.Mean(size, userv))) ) - - elap_time_rate.append( float(60*ne)/float(elap_time_avg[-1]) ) - cpu_time_rate.append( float(60*ne*np)/float(cpu_time_avg[-1]) ) - user_time_rate.append( float(60*ne)/float(float(TMath.Mean(size, userv))) ) - - #elap_time_stdev_x.append(float(TMath.RMS(size, cpSumTree.GetV2()))) - #cpu_time_stdev_x.append(float(TMath.RMS(size, cpSumTree.GetV4()))) - - msize = mpSumTree.Draw('m_par_time', "np==%i" % int(np), 'goff') - parv = mpSumTree.GetV1(); - par_elap_time.append(float(TMath.Mean(msize, parv))) - par_event_rate.append(float(60.0*ne)/par_elap_time[-1]) - - np_txt += "%10s" % np - for s in mp_lb: - gtl = getTreeList(mpSumTree, s, "np==%i" % int(np) ) - print ("%s: getTreeList: %s" % (s,gtl), end='') - gtl_avg = meanList(gtl) - print (" avg=%10.1f" % gtl_avg) - txt_dict[s] += "%10.1f" % gtl_avg - ltxt_dict[s].append( "%10.1f" % gtl_avg) - ltxt_dict["total_rate"].append("%10.1f" % - ( 60.0*float(np)*float(ne)/ float(ltxt_dict["m_par_time"][-1]) ) - ) - - print ("np_list=%s\n etime_stdev=%s \n cpu_time_stdev=%s" % (np_list, elap_time_stdev, cpu_time_stdev)) - print ("elap-cpu=%s" % (elap_cpu_time)) - - #mn = 7; mt=10000 - from socket import gethostname - import platform -######################### - graph_list = list() - -########################### - graph =dict() - graph['type'] = 'text' - graph['data'] = [ - "MP Times, Memory, IO, CPU PLOTS for ", - " %s " % root_file, - " machine: %s" % gethostname(), - #"%s, %s, %s, %s, %s, %s" % platform.uname(), - "%s" % platform.platform(), - "%s" % os.getenv('CMTCONFIG'), - " comments: %s" % comments, - "np=%s " % [int(s) for s in np_list] - ] - graph['title'] = 'ATHENA MP MONITOR mpMon REPORT' - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'latex' - graph['data'] = ltxt_dict - graph['title']= "mp_summary numbers:" - graph_list.append(graph) - -############################ - graph = dict() - graph['type'] = 'graph' - graph['data'] = { - "event_rate" : (mpSumTree, "(60*%i*np)/(m_par_time):np" % ne), - #"per_proc_rate": (mpSumTree, "event_proc_rate*np:np") - } - graph['title'] = "ATHENA MP EVENT PROCESSING RATE" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Evts/min " - graph_list.append(graph) - -############################ - - graph = dict() - graph['type'] = 'graph' - graph['data'] = { - 'parallel_rate': (mpSumTree, "event_rate:np"), - 'worker_rate': (cpSumTree, "(60*%i)/(elap_time):np" % ne), - } - graph['title'] = "Workers and parallel event processing rates, wall-clock time" - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Evts/Proc/Min" - graph['goptions'] = "ALP" - graph_list.append(graph) -############################ - cond="np>0" - graph = dict() - graph['type'] = 'graph' - graph['logY'] = False - graph['data'] ={ - 'free_mem': (memTree, ('kbmemfree/1024:Time', cond) ), - 'used_mem': (memTree, ('kbmemused/1024:Time', cond) ), - 'cached_mem': (memTree, ('kbcached/1024:Time', cond) ), - 'buffers_mem': (memTree, ('kbbuffers/1024:Time', cond) ), - 'kbswpused': (memTree, ('kbswapused/1024:Time', cond) ) - #'commit_mem': (memTree, ('kbcommit/1024:Time', cond) ) - } - graph['title'] = "Memory Activity for Athena MP %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Memory, Mb " - #graph['text']= "np = %s" % np_list - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'list' - graph['data'] = { - 'parallel_elap': (np_list, par_elap_time), - 'worker_elap_avg': (np_list, elap_time_avg), - 'worker_elap_max': (np_list, elap_time_max), - 'worker_cpu_avg': (np_list, cpu_time_avg), - 'worker_cpu_max': (np_list, cpu_time_max), - #'elap_time_stdev_x': (np_list, elap_time_stdev_x), - #'cpu_time_stdev_x': (np_list, cpu_time_stdev_x) - } - graph['title'] = "Parallel and Workers Elap and CPU Time Variations" - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Time, sec" - graph_list.append(graph) - -############################ - graph = dict() - graph['type'] = 'graph' - graph['data'] = { - "m_elap_time" : (mpSumTree, "m_elap_time:np"), - "m_firstevent_time": (mpSumTree, "m_firstevent_time:np"), - "m_par_time" : (mpSumTree, "m_par_time:np"), - "m_merging_time": (mpSumTree, "m_merging_time:np") - } - graph['title'] = "PARENT PROCESS TIMES" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Time, sec" - graph_list.append(graph) -############################## - """ - graph =dict() - graph['type'] = 'text' - graph['data'] = [ - "This plot intentially left blank" - ] - graph['title'] = 'BLANK CHART' - graph_list.append(graph) - """ -############################ - graph = dict() - graph['type'] = 'list' - graph['data'] = { - 'elap_time_stdev': (np_list, elap_time_stdev), - 'cpu_time_stdev': (np_list, cpu_time_stdev), - 'elap-cpu_avg': (np_list, elap_cpu_time), - } - graph['title'] = "Worker Processes Time Deviations" - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Time_StDev, sec" - graph_list.append(graph) - -############################ - graph = dict() - graph['type'] = 'graph' - graph['data'] = { - #"event_rate": (mpSumTree, "event_rate:np"), - #"event_rate_x": (mpSumTree, "(60*%i)/(x_par_time):np" % ne), - #"event_rate" : (mpSumTree, "(60*%i)/(m_par_time):np" % ne), - #"per_proc_rate": (mpSumTree, "event_proc_rate:np") - "event_rate" : (mpSumTree, "(60*%i*np)/(m_par_time):np" % ne), - "per_proc_rate": (mpSumTree, "event_proc_rate*np:np") - #"per_proc_rate_x": (mpSumTree, "(60*%i)/(x_par_time*np):np" % ne) - } - graph['title'] = "ATHENA MP EVENT PROCESSING RATE" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Evts/min, Evts/proc/min" - graph_list.append(graph) -############################ - - graph = dict() - graph['type'] = 'list' - graph['data'] = { - 'par_event_rate': (np_list, par_event_rate), - 'elap_avg_rate': (np_list, elap_time_rate), - 'cpu_avg_rate': (np_list, cpu_time_rate), - 'user_avg_rate': (np_list, user_time_rate) - } - graph['title'] = "Worker event processing rates Wall-Time, User-CPU, Total-CPU time rates, averaged." - graph['xtitle'] = "NbrProc" - graph['ytitle'] = "Evts/Proc/Min" - graph_list.append(graph) - -############################# - graph = dict() - graph['type'] = 'graph' - graph['data'] ={ - "total_mem": (mpSumTree, "free_mem_spike/1024:np"), - "mem_per_proc": (mpSumTree, "free_mem_spike/np/1024:np"), - } - graph['title'] = "PHYSICAL MEMORY CONSUMPTION BY AthenaMP" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Memory Consumption, Kb" - graph_list.append(graph) - - cond = "vmem!=0" -############################# - graph = dict() - graph['type'] = 'profile' - graph['data'] ={ - "VMem": (cpSumTree, "vmem:np" ), - "Memory_per_proc": (mpSumTree, "free_mem_spike/np/1024:np"), - #"VMem1": (cpSumTree, ("vmem:np", cond) ), - #"VMem2": (cpSumTree, ("vmem:np", cond) ), - #"VMem3": (cpSumTree, ("vmem:np", cond) ) - } - graph['title'] = "VMem, RealMemory per Worker" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Memory Consumption, Mb" - graph_list.append(graph) - -############################ - graph = dict() - graph['type'] = 'graph' - graph['data'] ={ - "mem_per_proc": (mpSumTree, "free_mem_spike/np/1024:np"), - "vmem": (cpSumTree, ("vmem:np", cond) ), - "rss": (cpSumTree, ("rss:np", cond) ) - } - graph['title'] = "VMEM, RSS, RealMemory(from free-spike) per Worker" - graph['xtitle']= "Nbr of Processes" - graph['ytitle']= "Memory Consumption, Mb" - graph_list.append(graph) - -############################ - cond = "np>0" -############################ - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - "bread/sec": (ioTree, ('breadps:Time', cond) ), - "bwrite/sec": (ioTree, ('bwrtnps:Time', cond) ) - } - graph['title'] = "IO Activity for Athena MP %s " % cond - graph['xtitle']= "Time" - graph['ytitle']= "Total Amount of Data R/W in blocks per sec" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - "write_reqs/sec": (ioTree, ('wtps:Time', cond) ), - "read_reqs/sec": (ioTree, ('rtps:Time', cond) ), - "total_reqs/sec": (ioTree, ('tps:Time', cond) ) - } - graph['title'] = "IO Activity for Athena MP %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Transfer_requests/sec" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'graph' - graph['data'] ={ - '%_user' : (cpuTree, ('puser:Time', cond) ), - '%_system': (cpuTree, ('psystem:Time', cond) ), - '%_idle': (cpuTree, ('pidle:Time', cond) ), - '%_io_wait': (cpuTree, ('piowait:Time', cond) ) - } - graph['title'] = "CPU Activity for Athena MP %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Percentage of CPU Utilization" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -############################ - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - '%_io_wait': (cpuTree, ('piowait:Time', cond) ), - } - graph['title'] = "CPU Activity for Athena MP %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Percentage of CPU Utilization" - graph['text']= "np = %s" % np_list - graph_list.append(graph) -######################### - -######################### - graph = dict() - graph['type'] = 'graph' - graph['logY'] = True - graph['data'] ={ - 'numa_hit': (numaTree, ('numa_hit:Time', cond) ), - 'numa_miss':(numaTree,('numa_miss:Time', cond) ), - 'numa_foreign': (numaTree, ('numa_foreign:Time', cond) ), - 'interleave_hit': (numaTree, ('interleave_hit:Time', cond) ), - 'local_node': (numaTree, ('local_node:Time', cond) ), - 'other_node': (numaTree, ('other_node:Time', cond) ), - } - graph['title'] = "NUMA Activity for athena MJ (Multi Jobs) %s" % cond - graph['xtitle']= "Time, sec since 00:00" - graph['ytitle']= "Nbr of hits/misses" - graph['text']= "np = %s" % np_list - graph_list.append(graph) - -############################ - cond="np==2" - graph = dict() - graph['type'] = 'graph' - graph['data'] ={ - 'free_cached_buffers': (memTree, ('(kbmemfree+kbcached+kbbuffers)/1048576:(Time-60476)', cond) ), - } - graph['title'] = "Memory Consumption for Athena MP %s" % cond - graph['xtitle']= "Time, Sec" - graph['ytitle']= "Memory, Gb " - graph_list.append(graph) -############################ - - pads = list() - - cpp = 1 #charts per pad - ppc = 1 #pads per canvas - - c.cd() - gStyle.SetOptStat(0); - gStyle.SetPalette(1); - gStyle.SetCanvasColor(33); - gStyle.SetFrameFillColor(0); - gStyle.SetMarkerStyle(21) - gStyle.SetMarkerColor(2) - gStyle.SetMarkerSize(0.3) - print ("gStyle.Set done") - - title = TPaveLabel(0.1,0.98,0.9,1, "Athena MP Plots"); - title.SetFillColor(0); title.SetTextFont(40); - #title.Draw();print ("title Drawn") - - mgs = list() #List of TMultiGraphs - ls = list() #List of TLegends - gs = list() #List of TGraph - - for j in range(ppc): - y_factor = 0.99; x1 = 0.01; x2 = 0.99; y1 = y_factor - (y_factor-0.01)*(j+1)/float(ppc); y2 = y_factor - (y_factor-0.01)*j/float(ppc) - print ("x1,y1,x2,y2", x1, y1, x2, y2 ) - pad = TPad("pad%i" % j, "pad%i" % j, x1, y1, x2, y2, 10); pad.Draw() - pads.append(pad); - - num_cans = len(graph_list) /(cpp*ppc) if len(graph_list) % (cpp*ppc)==0 else len(graph_list)/(cpp*ppc) + 1 - graph_list += [None,]* (num_cans*cpp*ppc - len(graph_list)) - print ("number of pages/canvases in report = ", num_cans) - - pdf_file = root_file - for s in ['merged.', '.py', '.root']: - pdf_file = pdf_file.replace(s, '') - pdf_file ="%s.pdf" % pdf_file - tpl = list() - for i in range(num_cans): - for j in range(ppc): - graph = graph_list[ppc*i+j] - if graph is None: - continue - - if 'logY' in graph.keys(): - if graph['logY']: - pads[j].SetLogy() - else: - pads[j].SetLogy(0) - - pads[j].cd() - pads[j].SetRightMargin(0.2) - l = TLegend(0.82,0.20,0.99,0.89); ls.append(l) - #tpl = list()#TProfile list - mg = TMultiGraph(); mgs.append(mg) - print ("graph=", graph) - gs.append(MakeMultiGraph(c, pads[j],graph, mg, l, tpl)) - - c.Update() - if i == 0: - print ("pdf.start") - c.Print(pdf_file+'(', 'pdf') #start page - elif i < num_cans-1: - print ("pdf.body") - c.Print(pdf_file, 'pdf') #body pages - else: - print ("pdf.end") - c.Print(pdf_file + ')', 'pdf') #end page - #c.SaveAs("%s.%i.png" % (pdf_file, i)) - #c.SaveAs("%s.%i.C" % (pdf_file, i)) - for pad in pads: - pad.Clear() - - #c.Update() - #c.cd() - pads[0].cd() - pads[0].SetRightMargin(0.2) - draw_option="" - for tp in tpl: - print (" TProfile: %s" % tp.GetName()) - if draw_option=="": - tp.Draw() - draw_option="PSAME" - else: - tp.Draw("PSAME") - c.Update() - #c.Print("%s.pdf" % "coors.memory_fig") - #c.SaveAs("%s.png" % "coors.memory_fig") - #c.SaveAs("%s.C" % "coors.memory_fig") - - -########## THE END ################ - - diff --git a/Control/AthenaMP/share/AthenaMP_jobProperties.py b/Control/AthenaMP/share/AthenaMP_jobProperties.py deleted file mode 100755 index 31d32ee8291aee806db6d37f05cd00b22b83cc62..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/AthenaMP_jobProperties.py +++ /dev/null @@ -1,117 +0,0 @@ -# @file: Control/AthenaMP/share/AthenaMP_jobProperties -# @author: Mous Tatarkhanov <tmmous@cern.ch> -# $Id: AthenaMP_jobProperties.py, v 1.1 2010-12-02 tmmous Exp $ - -#============================================================== -# List of all available AthenaMP job properties explained -#============================================================== - -#-------------------IMPORTANT NOTE----------------------------- -# All mp jobs get default job property values -# NO NEED to include this file! -# -# USE this file and change settings in your job options -# ONLY when you want to change/play with properties! -#-------------------------------------------------------------- - -#============================================================== -# HOW TO RUN ATHENAMP: -# 1. Job option runs: -# CLI '--nprocs=$n' to be used in command line run of athena.py -# 2. Job transform runs: -# CLI '--athenaopts="--nprocs=$n"' to be used in command -#============================================================== - -#-------------------------------------------------------------- -# AthenaMP properties -#-------------------------------------------------------------- -from AthenaMP.AthenaMPFlags import jobproperties as jps - -#--------------------TmpDir----------------------------------- -#TmpDir property controls where the workers create tmp-dir for output - -jps.AthenaMPFlags.TmpDir= "" #default -> "/tmp/$USER/athena-mp-tmp-$USER/$PID" -#jps.AthenaMPFlags.TmpDir = "$HOME" #set to your home folder -#-EXAMPLE- -#for testing different IO setting for MP runs (workers output): -#jps.AthenaMPFlags.TmpDir = "$HOME/scratch0" # on lxplus network drive -#jps.AthenaMPFlags.TmpDir = "/dev/shmm" # standard in memory drive -#-------------------------------------------------------------- - - -#-------------------AffinityCPUList----------------------------- -# AffinityCPUlist option controls the affinity of workers to CPU-cores on -# multicore machines. default value = [] -> workers float (not pinned) - -jps.AthenaMPFlags.AffinityCPUList=[] #default -> workers are floating (not pinned) -#jps.AthenaMPFlags.AffinityCPUList=[0,1,2,3,4,5,6,7,8] - -#-NOTES- -#if you have only 4 workers they are pinned -#to following order of CPU-cores: 0,1,2,3 -#if you want to change the order to 8,7,6,5 -# CPUList should be [8,7,6,5,...] - -##-EXAMPLE- -#If you are forking only two workers, and you want to pin them -# to the first and last CPU-cores available on your machine do: -#import multiprocessing -#jps.AthenaMPFlags.AffinityCPUList=[0, multiprocessing.cpu_count()-1] -#-------------------------------------------------------------- - - -#----------------- EventsBeforeFork---------------------------- -# EventsBeforeFork option controls when to fork Workers from mother process 0-after init -# value n=0-> after init, n>0 -> after n'th event. default value - 0. - -jps.AthenaMPFlags.EventsBeforeFork=0 #default -#jps.AthenaMPFlags.EventsBeforeFork=1 #fork workers after processing 1-evt in mother - -#-NOTES- -#Forking after processing evts in mother has advantage of -#sharing more memory among workers and parent by COW mechanism. (>0.5Gb vs. 0.2Gb) -#But that triggers vetoing algorithm on first events processed in mother -#on each output stream to avoid writing of pool_file in mother -#This is done for merging compliance reasons of pool files created by workers -# /vetoing algorithm ref: Charles Leggett/ -#-------------------------------------------------------------- - - -#--------------------doFastMerge----------------------------- -#doFastMerge is the switch between pool_fast_merge and pool_slow_merge - -jps.AthenaMPFlags.doFastMerge = False #default -> use more reliable slow merge -#jps.AthenaMPFlags.doFastMerge = True - -#-NOTES- -#slow merge creates separate Athena merging job /ref: Sebastien Binet/ -#fast_merge should be an order of magnitude faster than slow merge -#fast merge uses PoolMerge tool /ref: Peter Van Gemmeren/ -#-------------------------------------------------------------- - - -#--------------------doRoundRobin----------------------------- -#doRoundRobing is the switch between RoundRobin and Queue mechanism -#of feeding evts to Workers - -jps.AthenaMPFlags.doRoundRobin = False #default -> use MP.Queue -#--------------------useSingleReader----------------------------- -#useSingleReader is the switch to choose single reader to get events -#into shared memory queue - -jps.AthenaMPFlags.useSingleReader = False #default -> worker read from file -#jps.AthenaMPFlags.doFastMerge = True #use RoundRobin - -#-COMMENTS- -#default Mp.Queue is more efficient way of distributing evts to Workers -#but when EvtMax=-1 (undefined nbr. of evts.) -> RoundRobin scheme is used -#-------------------------------------------------------------- - - -#-------------------------------------------------------------- -#--------- more options will be created as needed or requested- -#-------------------------------------------------------------- - -#============================================================== -#---------------------- THE END ------------------------------- -#============================================================== diff --git a/Control/AthenaMP/share/bs2esd.py b/Control/AthenaMP/share/bs2esd.py deleted file mode 100644 index 8d4c593ae5cc60eaad439adc285284a7be6086e3..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/bs2esd.py +++ /dev/null @@ -1,27 +0,0 @@ -# steering file for BS->ESD step - - -from RecExConfig.RecFlags import rec -rec.doTrigger .set_Value_and_Lock( False ) -rec.doESD .set_Value_and_Lock( False ) -rec.doAOD .set_Value_and_Lock( False ) -rec.doWriteAOD .set_Value_and_Lock( False ) -rec.doWriteTAG .set_Value_and_Lock( False ) -rec.doCBNT .set_Value_and_Lock( False ) -rec.doTruth .set_Value_and_Lock( False ) -rec.doPerfMon .set_Value_and_Lock( False ) - -rec.readRDO .set_Value_and_Lock( True ) -rec.doWriteESD .set_Value_and_Lock( True ) - -BSRDOInput=["/afs/cern.ch/atlas/project/rig/data/data10_7TeV.00152166.physics_MinBias.merge.RAW._lb0206._0001.1"] - -from AthenaCommon.AthenaCommonFlags import athenaCommonFlags as acf -acf.FilesInput = BSRDOInput -acf.EvtMax = 10 - - -# main jobOption -include ("RecExCommon/RecExCommon_topOptions.py") - - diff --git a/Control/AthenaMP/share/mp_rdotoesd.py b/Control/AthenaMP/share/mp_rdotoesd.py deleted file mode 100755 index f50dc0717051f5cad8adeccb0586ecfc8efa3287..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/mp_rdotoesd.py +++ /dev/null @@ -1,48 +0,0 @@ - -#============================================================== -# Job Options for fast-reco with AthenaMP -#============================================================== - -# assumptions: -# 1. RecExCommon_links.sh to be sourced in curdir -# 2. CLI option --nprocs to be used in command line run of athena.py - - -#---------------------------------------------------------------------- -# AthenaMP properties -#---------------------------------------------------------------------- -# expect "--nprocs" to be used in command line options of athena.py -from AthenaMP.AthenaMPFlags import jobproperties as jps -jps.AthenaMPFlags.EventsBeforeFork=0 - -import multiprocessing -cpu_list = range( multiprocessing.cpu_count() ) # [0,1,2,..,ncpus] for many-core machine -cpu_list.reverse() #reverse the cpu-proc pinning order -jps.AthenaMPFlags.AffinityCPUList=cpu_list - - -#---------------------------------------------------------------------- -# setting mp_rdotoesd reco -#---------------------------------------------------------------------- -# expect RecExCommon_links.sh to be sourced in curdir - - - -# main jobOption -include ("RecExCommon/rdotoesd.py") - - -#---------------------------------------------------------------------- -# FOR DEBUGGING PURPOSES -#---------------------------------------------------------------------- -#from AthenaCommon.AppMgr import theApp -#theApp.ReflexPluginDebugLevel = 10000 - -#from AthenaCommon.Logging import log as msg -#msg.info ( "svcMgr=%s" % svcMgr) -#msg.info ( "appMgr=%s" % theApp) - - -#---------------------------------------------------------------------- -# user modifier should come here -#---------------------------------------------------------------------- diff --git a/Control/AthenaMP/share/mp_rdotoesd_id.py b/Control/AthenaMP/share/mp_rdotoesd_id.py deleted file mode 100755 index 41446d9630c61eb4fa073fce185f3779e5850ead..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/mp_rdotoesd_id.py +++ /dev/null @@ -1,48 +0,0 @@ - -#============================================================== -# Job Options for fast-reco with AthenaMP -#============================================================== - -# assumptions: -# 1. RecExCommon_links.sh to be sourced in curdir -# 2. CLI option --nprocs to be used in command line run of athena.py - - -#---------------------------------------------------------------------- -# AthenaMP properties -#---------------------------------------------------------------------- -# expect "--nprocs" to be used in command line options of athena.py -from AthenaMP.AthenaMPFlags import jobproperties as jps -jps.AthenaMPFlags.EventsBeforeFork=0 - -import multiprocessing -cpu_list = range( multiprocessing.cpu_count() ) # [0,1,2,..,ncpus] for many-core machine -cpu_list.reverse() #reverse the cpu-proc pinning order -jps.AthenaMPFlags.AffinityCPUList=cpu_list - - -#---------------------------------------------------------------------- -# setting mp_rdotoesd reco -#---------------------------------------------------------------------- -# expect RecExCommon_links.sh to be sourced in curdir - - - -# main jobOption -include ("RecExRecoTest/RecExRecoTest_RTT_id.py") - - -#---------------------------------------------------------------------- -# FOR DEBUGGING PURPOSES -#---------------------------------------------------------------------- -#from AthenaCommon.AppMgr import theApp -#theApp.ReflexPluginDebugLevel = 10000 - -#from AthenaCommon.Logging import log as msg -#msg.info ( "svcMgr=%s" % svcMgr) -#msg.info ( "appMgr=%s" % theApp) - - -#---------------------------------------------------------------------- -# user modifier should come here -#---------------------------------------------------------------------- diff --git a/Control/AthenaMP/share/mp_reco_fast.py b/Control/AthenaMP/share/mp_reco_fast.py deleted file mode 100755 index f10ff1b2a674c67356674fed181064c1139af296..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/mp_reco_fast.py +++ /dev/null @@ -1,75 +0,0 @@ - -#============================================================== -# Job Options for fast-reco with AthenaMP -#============================================================== - -# assumptions: -# 1. RecExCommon_links.sh to be sourced in curdir -# 2. CLI option --nprocs to be used in command line run of athena.py - - -#---------------------------------------------------------------------- -# AthenaMP properties -#---------------------------------------------------------------------- -# expect "--nprocs" to be used in command line options of athena.py -from AthenaMP.AthenaMPFlags import jobproperties as jps -jps.AthenaMPFlags.EventsBeforeFork=1 - -import multiprocessing -cpu_list = range( multiprocessing.cpu_count() ) # [0,1,2,..,ncpus] for many-core machine -cpu_list.reverse() #reverse the cpu-proc pinning order -jps.AthenaMPFlags.AffinityCPUList=cpu_list - - -#---------------------------------------------------------------------- -# Setting fast-reco w/o Calo, Muon or Trigger. -#---------------------------------------------------------------------- - -# expect RecExCommon_links.sh to be sourced in curdir - -from AthenaCommon.AthenaCommonFlags import athenaCommonFlags as acFlags -acFlags.EvtMax=10 -acFlags.PoolESDOutput="ESD.pool.root" - -from RecExConfig.RecFlags import rec -rec.doPerfMon=True - -rec.doCalo=False -rec.doMuon=False -#rec.doID=True -rec.doTrigger=False - -rec.doESD=True -rec.doAOD=False -rec.doHist=False -rec.doWriteESD=True -rec.doWriteAOD=False -rec.doWriteTAG=False - -# if needed to configure trigger -# see https://twiki.cern.ch/twiki/bin/view/Atlas/TriggerFlags -# include ( "TriggerJobOpts/TriggerFlags.py" ) - -# if needed to configure AOD building -# see https://twiki.cern.ch/twiki/bin/view/Atlas/UserAnalysisTest#The_AOD_Production_Flags -# from ParticleBuilderOptions.AODFlags import AODFlags - -# main jobOption -include ("RecExCommon/RecExCommon_topOptions.py") - - -#---------------------------------------------------------------------- -# FOR DEBUGGING PURPOSES -#---------------------------------------------------------------------- -#from AthenaCommon.AppMgr import theApp -#theApp.ReflexPluginDebugLevel = 10000 - -#from AthenaCommon.Logging import log as msg -#msg.info ( "svcMgr=%s" % svcMgr) -#msg.info ( "appMgr=%s" % theApp) - - - -#---------------------------------------------------------------------- -# user modifier should come here -#---------------------------------------------------------------------- diff --git a/Control/AthenaMP/share/tests/AMP_basictests.py b/Control/AthenaMP/share/tests/AMP_basictests.py deleted file mode 100755 index 24e27c5bd8d6ad336d3d33425f3c0aaa9235581b..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/tests/AMP_basictests.py +++ /dev/null @@ -1,217 +0,0 @@ -import sys, os, unittest -sys.path.append( os.path.join( os.getcwd(), os.pardir ) ) - -from common import * - -__all__ = [ - 'Basic011ModuleTestCase', - 'Basic02ExecutionTestCase', - 'Basic03GroupTestCase', - 'Basic04SharedQueueTestCase', -] - -if '--build' in sys.argv: - res = os.system( "cd ../../cmt; make QUICK=1" ) - if res: - sys.exit( res ) - - -### basic module test cases ================================================== -class Basic01ModuleTestCase( MyTestCase ): - def test01API( self ): - """Test module loading and API existence""" - - import _athenamp - - self.assert_( hasattr( _athenamp, 'launch' ) ) - self.assert_( hasattr( _athenamp, 'ProcessGroup' ) ) - self.assert_( hasattr( _athenamp, 'Process' ) ) - self.assert_( hasattr( _athenamp, 'SharedQueue' ) ) - - def test02ArgumentsAndErrors( self ): - """Test basic faulty argument error handling""" - - import _athenamp - - self.assertRaises( TypeError, _athenamp.launch, 1 ) - - def test03Instantiations( self ): - """Test class instantiations""" - - import _athenamp - - proc = _athenamp.Process( -1 ) - self.assertEqual( proc.pid, -1 ) - - proc = _athenamp.Process( pid = -1 ) - self.assertEqual( proc.pid, -1 ) - - group = _athenamp.ProcessGroup() - group = _athenamp.ProcessGroup( 4 ) - group = _athenamp.ProcessGroup( nprocs = 4 ) - - queue = _athenamp.SharedQueue() - queue = _athenamp.SharedQueue( 100 ) - - -### basic execution test cases =============================================== -class Basic02ExecutionTestCase( MyTestCase ): - def _checkChildren( self ): - # the following tests that there are no children running - self.assertRaises( OSError, os.wait ) - - def setUp( self ): - self._checkChildren() - - def tearDown( self ): - self._checkChildren() - - def test01RunChild( self ): - """Test running and destruction of a child""" - - import _athenamp - - proc = _athenamp.launch() - self.assert_( 0 <= proc.pid ) - - if proc.pid == 0: - import signal - signal.pause() - else: - import time, signal - time.sleep(1) - sigtosend = signal.SIGKILL - os.kill( proc.pid, sigtosend ) - result = os.waitpid( proc.pid, 0 ) - self.assertEqual( result[0], proc.pid ) - self.assertEqual( result[1], sigtosend ) - - def test02RunChildren( self ): - """Test running and destruction of a group of children""" - - import _athenamp - - pids = [] - - leader = _athenamp.launch() - if leader.pid == 0: # make child wait - import signal - signal.pause() - else: - os.setpgid( leader.pid, 0 ) - pids.append( leader.pid ) - - for i in range( 2 ): - proc = _athenamp.launch(); - self.assert_( 0 <= proc.pid ) - - if proc.pid == 0: # make all children wait - import signal - signal.pause() - else: - assert leader.pid - os.setpgid( proc.pid, os.getpgid( leader.pid ) ) - pids.append( proc.pid ) - - import time, signal - time.sleep( 1 ) - sigtosend = signal.SIGKILL - pgid = os.getpgid( leader.pid ) - os.killpg( pgid, sigtosend ) - while pids: - result = os.waitpid( -pgid, 0) - self.assert_( result[0] in pids ) - self.assertEqual( result[1], sigtosend ) - pids.remove( result[0] ) - - -### basic group usage test cases ============================================= -class Basic03GroupTestCase( MyTestCase ): - def test01GroupLifetime( self ): - """Test creation and life time of a group""" - - import _athenamp - - group = _athenamp.ProcessGroup( 4 ) - - # nothing started yet, so waiting should simply return - self.assertEqual( group.wait(), () ) - self.assertEqual( group.wait( 0 ), () ) - self.assertEqual( group.wait( options = 0 ), () ) - - def test02RunMapAsync( self ): - """Test no-op running of map_async on a worker group""" - - import _athenamp - - group = _athenamp.ProcessGroup( 4 ) - group.map_async( "exit" ) - self.assertEqual( len(group._children()), 4 ) # now instantiated - - status = group.wait() - self.assertEqual( [ x[1] for x in status ], 4*[0,] ) - - def test03PythonTaskMapAsync( self ): - """Test running a python task via map_async on a worker group""" - - import _athenamp, __main__ - - def myfunc(): - # print 'called myfunc' - return 1 - __main__.myfunc = myfunc - - # existing function with return value - group = _athenamp.ProcessGroup( 4 ) - group.map_async( "myfunc" ) - status = group.wait() - - self.assertEqual( [ x[1] for x in status ], 4*[0,] ) - self.assertEqual( [ x[2] for x in status ], 4*[1,] ) - - # non-existing function, leading to failure - group = _athenamp.ProcessGroup( 4 ) - group.map_async( "no_such_func" ) - status = group.wait() - - self.assertEqual( [ x[1] for x in status ], 4*[0x0B,] ) - - -### basic group usage test cases ============================================= -class Basic04SharedQueueTestCase( MyTestCase ): - def test01SharedQueueSending( self ): - """Test put functionality of shared queue""" - - import _athenamp, random - - q = _athenamp.SharedQueue( 5 ) - - r = random.Random( 1 ) - largebuf = ''.join( [ str(r.random()) for i in range(4096) ] ) - self.assertRaises( OverflowError, q.put_nowait, largebuf ) # too large for buffer - - for i in range(5): - q.put_nowait( i ) - self.assertRaises( OverflowError, q.put_nowait, 5 ) # too many elements - - for i in range(5): - self.assertEqual( q.get_nowait(), i ) - self.assertRaises( EOFError, q.get_nowait ) - - sdata = [ "text", "text\0with\0null", "morenull\0\0" ] - for t in sdata: - q.put_nowait( t ) - self.assertEqual( q.get_nowait(), t ) - - -## actual test run -if __name__ == '__main__': - from MyTextTestRunner import MyTextTestRunner - - loader = unittest.TestLoader() - testSuite = loader.loadTestsFromModule( sys.modules[ __name__ ] ) - - runner = MyTextTestRunner( verbosity = 2 ) - result = not runner.run( testSuite ).wasSuccessful() - - sys.exit( result ) diff --git a/Control/AthenaMP/share/tests/MyTextTestRunner.py b/Control/AthenaMP/share/tests/MyTextTestRunner.py deleted file mode 100755 index be8438bd2b125548d4221441080d0166a8c48e69..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/tests/MyTextTestRunner.py +++ /dev/null @@ -1,37 +0,0 @@ -import unittest - -if hasattr( unittest, 'TextTestResult' ): - class MyTextTestResult( unittest.TextTestResult ): - def getDescription(self, test): - return test.shortDescription() -else: - class MyTextTestResult( object ): - pass - - -class MyTextTestRunner( unittest.TextTestRunner ): - resultclass = MyTextTestResult - - def run( self, test ): - """Run the given test case or test suite.""" - - result = self._makeResult() - test( result ) - result.printErrors() - self.stream.writeln( result.separator2 ) - run = result.testsRun - self.stream.writeln() - - if not result.wasSuccessful(): - self.stream.write( "FAILED (" ) - failed, errored = map( len, ( result.failures, result.errors ) ) - if failed: - self.stream.write( "failures=%d" % failed ) - if errored: - if failed: self.stream.write( ", " ) - self.stream.write( "errors=%d" % errored ) - self.stream.writeln( ")" ) - else: - self.stream.writeln( "OK" ) - - return result diff --git a/Control/AthenaMP/share/tests/common.py b/Control/AthenaMP/share/tests/common.py deleted file mode 100755 index 4a7d1da005ba2d59f0fcb912bf23b375a5fa041d..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/tests/common.py +++ /dev/null @@ -1,29 +0,0 @@ -# File: roottest/python/common.py -# Author: Wim Lavrijsen (LBNL, WLavrijsen@lbl.gov) -# Created: 09/24/10 -# Last: 09/30/10 - -__all__ = [ 'pylong', 'maxvalue', 'MyTestCase' ] - -import os, sys, unittest - -if sys.hexversion >= 0x3000000: - pylong = int - maxvalue = sys.maxsize - - class MyTestCase( unittest.TestCase ): - def shortDescription( self ): - desc = str(self) - doc_first_line = None - - if self._testMethodDoc: - doc_first_line = self._testMethodDoc.split("\n")[0].strip() - if doc_first_line: - desc = doc_first_line - return desc -else: - pylong = long - maxvalue = sys.maxint - - class MyTestCase( unittest.TestCase ): - pass diff --git a/Control/AthenaMP/share/tests/extract_mp_stat.py b/Control/AthenaMP/share/tests/extract_mp_stat.py deleted file mode 100755 index 8d77ccc29283e38a3e16c136ea9eaa00f7217528..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/tests/extract_mp_stat.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python - -# @file: mpMon.py -# @purpose: This script extract the WORKER information from the parent process's log and then WORKER's logs in AthenaMP -# @author: Mous Tatarkhanov <tmmous@cern.ch> -# @date: April 15, 2009 -# @example: -# @code -# extract_mp_stat.py log.parent_process -# @endcode -# - -from __future__ import print_function - -__version__ = "$Revision: 285809 $" -__author__ = "Mous Tatarkhanov <tmmous@cern.ch>" - -from optparse import OptionParser -#import AthenaMP.mpMonTools as mpt - -import sys - -if __name__ == "__main__": - - parser = OptionParser(usage="usage: %prog [options] -f jobo") - p = parser.add_option - p( "-f", - "--file", - dest = "log_file", - default = None, - help ="athenaMP parent process stdout log" - ) - - (options, args) = parser.parse_args() - - import sys, os - if options.log_file == None: - str(parser.print_help() or "") - sys.exit(1) - - log_file = options.log_file - - print ("log_file = %s" % log_file) - if not os.path.exists(log_file): - print ("log_file doesn't exist. Please give valid parent process log file") - str(parser.print_help() or "") - sys.exit(1) - - - from AthenaMP import mpMonTools as mpt - mpt.writeOutWorkersStat(log_file) - print ("DONE...") diff --git a/Control/AthenaMP/share/tests/flush_cache.py b/Control/AthenaMP/share/tests/flush_cache.py deleted file mode 100755 index efeab70276392bef71eb70d6951fd87e6b854572..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/tests/flush_cache.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function - -import os, sys, time -import multiprocessing -from multiprocessing import Pool - -from future import standard_library -standard_library.install_aliases() -import subprocess - -def flush_cache(n): - l = list() - for i in range(n): - l.append("a" * (1 << 30)) - print ("%i gb of memory eaten" % n) - time.sleep(10) - return n - -if __name__ == '__main__': - out = subprocess.getoutput('free -m') - print (">free -m \n", out) - - total_mem_mb =int( out.splitlines()[1].split()[1]) - cached_mem_mb = int( out.splitlines()[1].split()[6]) - print ("CACHED [%i Mb] - before flush" % cached_mem_mb) - - if cached_mem_mb < 200: - print ("no need to flush the cache... bye!") - sys.exit(0) - - gb = 1 + (total_mem_mb >> 10) - - ncpus = multiprocessing.cpu_count() - print ("ncpus= [%i]" % ncpus) - print ("total available memory [%i Mb] [%i Gb]" % (total_mem_mb, gb)) - - nprocs = 2*ncpus - ngb = 1 + gb / nprocs - - - print ("Nbr of Procs to bite on memory [%i] " % nprocs) - print ("Nbr of Gb to flush per process [%i Gb]" % ngb) - - - pool = Pool(processes = nprocs) - result = pool.map(flush_cache, [ngb,]*nprocs) - print ("Total memory eaten: [%i Gb]" % sum(result)) - - out = subprocess.getoutput('free -m') - print (">free -m \n", out) - cached_mem_mb = int( out.splitlines()[1].split()[6]) - print ("CACHED [%i Mb] - after flush" % cached_mem_mb) - print ("Your machine's memory cache is flushed" ) - - time.sleep(5) diff --git a/Control/AthenaMP/share/tests/mjMon.py b/Control/AthenaMP/share/tests/mjMon.py deleted file mode 100755 index 28697e436aa873261770450111e5072aec3453d5..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/tests/mjMon.py +++ /dev/null @@ -1,287 +0,0 @@ -#!/usr/bin/env python - -# @file: mjMon.py -# @purpose: This script monitors the mp performance of AthenaMP -# Rewriting the shell script - athenaMP_monitor.sh -# Inspired by monitoring tools from Sebastien: PerfMon; -# @author: Mous Tatarkhanov <tmmous@cern.ch> -# @date: December 2009 -# -# @example: -# @code -# monitor_athenaMP 2 rdotoesd.py 60 -# @endcode -# - -from __future__ import print_function - -__version__ = "$Revision: 276792 $" -__author__ = "Mous Tatarkhanov <tmmous@cern.ch>" - -from optparse import OptionParser -import AthenaMP.mjMonTools as mpt - -import sys - -class Writer: - def __init__(self, stdout, filename): - self.stdout = stdout - self.logfile = open(filename, 'a') - - def write(self, text): - self.stdout.write(text) - self.logfile.write(text) - - def close(self): - self.stdout.close() - self.logfile.close() - - def flush(self): - self.stdout.flush() - self.logfile.flush() - -if __name__ == "__main__": - - parser = OptionParser(usage="usage: %prog [options] -f jobo") - p = parser.add_option - p( "-j", - "--jobo", - dest = "jobo", - help = "The path to the job options file to run in parallel" ) - p( "-p", - "--np", - dest = "nbrProcs", - default = [1,2], - help = "Nbr of parallel processes to fork" ) - p( "-e", - "--ne", - dest = "nbrEvts", - default = "-1", - help = "Number of events to process, EvtMax" ) - p( "-o", - "--output", - dest = "outFileName", - default = 'mplog', - help = "Name of the output file which will contain the informations gathered for monitoring." ) - p( "-f", - "--flush_cache", - dest = "doFlushCache", - action = "store_true", - default = False, - help ="switch to activate flushing of the machine cache of the machine before launching athenaMP" ) - p( "-c", - "--comments", - dest = "commentsStr", - default = "", - help = "comments to add to the name and report" ) - p( "-d", - "--doPlots", #FIX - dest = "doPlots", - action="store_true", - default = False, - help = "switch to activate plotting of report charts at the end" - ) - p( "-n", - "--numa_set", #FIX - dest = "numaSet", - #action="store_true", - default = None, - help = "this option activates numa settings. Format: [(0,0),(1,1),...]" - ) - (options, args) = parser.parse_args() - - import sys - if options.jobo == None or options.nbrProcs ==None or options.nbrEvts == None: - str(parser.print_help() or "") - sys.exit(1) - - if len(args) > 0: - fileNames = [ arg for arg in args if arg[0] != "-" ] - pass - - #output redirection to file and display - writer = Writer(sys.stdout, options.outFileName ) - sys.stdout = writer - np_list = eval(options.nbrProcs) - if isinstance(np_list, int): - np_list = [np_list,] - - if options.numaSet is not None: - numa_list = eval(options.numaSet) - else: - numa_list = None - - if isinstance(numa_list, list): - print ("numa_list=%s" % numa_list) - elif numa_list is not None: - print ("Please input correct numa_list") - str(parser.print_help() or "") - sys.exit(1) - - ne = int(options.nbrEvts) - jobo = options.jobo - print ("np_list = ", np_list) - print ("ne = ", ne) - print ("jobo = ", jobo) - job = jobo.split()[0] - print ("mpMon.log =", options.outFileName) - print ("doFluchCache=", options.doFlushCache, type(options.doFlushCache)) - if options.doFlushCache: - options.commentsStr += ".doFlushCache" - print ("numa_list=%s" % numa_list) - - def cleanup(): - print (' Cleaning...Goodbye!') - for pid in mpt.pid_list: - mpt.stop_proc_tree(pid) - - def _print_mem(): - mpt.print_memstat("<np%i.ne%i>:" % (np, ne)) - - import atexit - atexit.register(cleanup) - - TIME_STEP = mpt.TIME_STEP - - import os - import subprocess - import signal - import time - - - for np in np_list: - writer.flush() - - mpt.show_numactl() - - suffix = "mj.%s.%i.%i" % (jobo.split()[0], np, ne) - sar_log = "sar.%s" % suffix - if os.path.exists(sar_log): - os.remove(sar_log) - - if options.doFlushCache: - print (subprocess.call(['flush_cache.py',])) - - #time.sleep(TIME_STEP) - - mpt.init_mp_stat() - _mp_stat = mpt.mp_stat - #_print_mem() - - sar_proc = mpt.launch_sar(sar_log, TIME_STEP) #launching sar for io,mem,cpu monitoring - - #time.sleep(3*TIME_STEP) - - _print_mem(); - t0=t1=t2=t3=0 - t0=time.time() - - #LAUNCH: - proc_list = list() - proc_dict = dict() - #cpubind = [0,0,0,0, 1,1,1,1] - #membind= [0,0,0,0, 1,1,1,1] - - if numa_list is not None: - if len(numa_list) < np: - print ("len(numa_list) < np: need to append [('f','f'),]") - numa_list += [('f','f'),] * (np - len(numa_list)) - else: - print ("len(numa_list)==len(range(np)): there are enough numa settings defined") - - iterator = zip(range(np), numa_list) - else: - iterator = zip(range(np), range(np)) - - print ("numa_list=%s" % numa_list) - - se = 0 # skip events - numa_set = None - numa_str = "" - for item in iterator: - i = item[0] - if numa_list is not None: - numa_set = item[1] - numa_str = "numactl --membind=%i --cpubind=%i " % (numa_set[0],numa_set[1]) - - (proc, proc_out, proc_err) = mpt.launch_athena(jobo, ne, se, np, "ne%i.ni%i" % (ne,i), numa_set ); #launching athena - proc_list.append(proc) - proc_dict[proc.pid] = (proc_out, proc_err) - #print ("%s athena %i.%i.%i proc launched ...[pid %i] out:%s err:%s" % (numa_str, ne, np, i, proc.pid, proc_out, proc_err )) - se +=ne - time.sleep(TIME_STEP); - - #EXECUTION: - sc = list()# status code - ppid=os.getpid() - while mpt.launched_processes_working(ppid): - _print_mem() - time.sleep(TIME_STEP) - for proc in proc_list: - rc = proc.poll() # return code if subprocess finished - if sc is not None: - sc.append(rc) - - t1=time.time() - - print ("athena processes finished:") - - #SUMMARY - mpt.summarize_proc_stat() - for i in range(2): - _print_mem() - time.sleep(TIME_STEP) - - print ("FINISHED MONITORING:") - mpt.stop_proc(sar_proc) - - print ("COLLECTING STATISTICS...") - mpt.get_full_sar_stat(sar_log) - print ("FINISHED COLLECTING STATISTICS") - - print ("START ANALYSIS...") - - print (" ELAPSED TIMES: \n Time: dt1=[%i sec]" % (t1-t0)) - - - _mp_stat['sp_summary']=mpt.SPSummary(np) - _mp_summary = _mp_stat['sp_summary'] - _mp_summary.extract_summary("dir") - - #_mp_summary['x_init_time']= [t1-t0, ] #externally observed time - #_mp_summary['x_par_time'] = [t2-t1, ] #externally observed time - #_mp_summary['x_fin_time'] = [t3-t2, ] #externally observed time - #_mp_summary['x_elap_time']= [t1-t0, ] - #_mp_summary['event_rate']= [ float(ne)*60.0/float(_mp_summary['m_par_time'][0]), ] - #_mp_summary['event_rate_x'] = [ float(ne)*60.0/float(t1-t0), ] - #_mp_summary['event_proc_rate']= [ _mp_summary['event_rate'][0] / float(np), ] - #_mp_summary['event_proc_rate_x'] = [_mp_summary['event_rate_x'][0] / float(np), ] - - - mpt.print_summary() - - print ("FINISHED ANALYSIS") - - print ("START REPORT...") - mpt.prepare_mp_stat() # preparing mp_stat dictionary for ROOT - import pickle - pickle.dump(_mp_stat, open("pickle.%s.f" % suffix, "wb")) - - mpt.writeRootFile("%s.root" % suffix, np) - print ("FINISHED REPORT.") - - cleanup() - - import platform - from socket import gethostname - host_name = gethostname().split('.')[0] - merged_root_file = "%s.mj.%s.ne%i.%s.root" % (host_name, job, ne, options.commentsStr) - mpt.mergeRootOutput(merged_root_file, job, np_list, ne) - - if options.doPlots: - mpt.report(merged_root_file, ne, comments = options.commentsStr) - - cleanup() - print ("The End") - sys.exit(0) - diff --git a/Control/AthenaMP/share/tests/mpMon.py b/Control/AthenaMP/share/tests/mpMon.py deleted file mode 100755 index 92400de6aaf920c6a2645634857e51ae44a65b75..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/tests/mpMon.py +++ /dev/null @@ -1,276 +0,0 @@ -#!/usr/bin/env python - -# @file: mpMon.py -# @purpose: This script monitors the mp performance of AthenaMP -# Rewriting the shell script - athenaMP_monitor.sh -# Inspired by monitoring tools from Sebastien: PerfMon; -# @author: Mous Tatarkhanov <tmmous@cern.ch> -# @date: December 2009 -# -# @example: -# @code -# monitor_athenaMP 2 rdotoesd.py 60 -# @endcode -# - -from __future__ import print_function - - -""" -PROCESS STATE CODES -Here are the different values that the s, stat and state output specifiers (header "STAT" or "S") will -display to describe the state of a process. -D Uninterruptible sleep (usually IO) -R Running or runnable (on run queue) -S Interruptible sleep (waiting for an event to complete) -T Stopped, either by a job control signal or because it is being traced. -W paging (not valid since the 2.6.xx kernel) -X dead (should never be seen) -Z Defunct ("zombie") process, terminated but not reaped by its parent. -""" - -__version__ = "$Revision: 285809 $" -__author__ = "Mous Tatarkhanov <tmmous@cern.ch>" - -from optparse import OptionParser -import AthenaMP.mpMonTools as mpt - -import sys - -class Writer: - def __init__(self, stdout, filename): - self.stdout = stdout - self.logfile = open(filename, 'a') - - def write(self, text): - self.stdout.write(text) - self.logfile.write(text) - - def close(self): - self.stdout.close() - self.logfile.close() - - def flush(self): - self.stdout.flush() - self.logfile.flush() - -if __name__ == "__main__": - - parser = OptionParser(usage="usage: %prog [options] -f jobo") - p = parser.add_option - p( "-j", - "--jobo", - dest = "jobo", - help = "The path to the job options file to run in parallel" ) - p( "-p", - "--np", - dest = "nbrProcs", - default = [1,2], - help = "Nbr of parallel processes to fork" ) - p( "-e", - "--ne", - dest = "nbrEvts", - default = "-1", - help = "Number of events to process, EvtMax" ) - p( "-o", - "--output", - dest = "outFileName", - default = 'mplog', - help = "Name of the output file which will contain the informations gathered for monitoring." ) - p( "-f", - "--flush_cache", - dest = "doFlushCache", - action = "store_true", - default = False, - help ="switch to activate flushing of the machine cache of the machine before launching athenaMP" ) - p( "-c", - "--comments", - dest = "commentsStr", - default = "", - help = "comments to add to the name and report" ) - p( "-d", - "--doPlots", #FIX - dest = "doPlots", - action="store_true", - default = False, - help = "switch to activate plotting of report charts at the end" - ) - - (options, args) = parser.parse_args() - - import sys - if options.jobo == None or options.nbrProcs ==None or options.nbrEvts == None: - str(parser.print_help() or "") - sys.exit(1) - - if len(args) > 0: - fileNames = [ arg for arg in args if arg[0] != "-" ] - pass - - #output redirection to file and display - writer = Writer(sys.stdout, options.outFileName ) - sys.stdout = writer - np_list = eval(options.nbrProcs) - if isinstance(np_list, int): - np_list = [np_list,] - - ne = int(options.nbrEvts) - jobo = options.jobo - print ("np_list = ", np_list) - print ("ne = ", ne) - print ("jobo = ", jobo) - print ("mpMon.log =", options.outFileName) - print ("doFluchCache=", options.doFlushCache, type(options.doFlushCache)) - if options.doFlushCache: - options.commentsStr += ".doFlushCache" - - - def cleanup(): - print (' Cleaning...Goodbye!') - for pid in mpt.pid_list: - mpt.stop_proc_tree(pid) - - import atexit - atexit.register(cleanup) - - TIME_STEP = mpt.TIME_STEP - - import os - import subprocess - import signal - import time - - for np in np_list: - writer.flush() - - suffix = "mp.%s.%i.%i" % (jobo, np, ne) - sar_log = "sar.%s" % suffix - if os.path.exists(sar_log): - os.remove(sar_log) - - def _print_mem(): - mpt.print_memstat("<np%i.ne%i>:" % (np, ne)) - - if options.doFlushCache: - print (subprocess.call(['flush_cache.py',])) - time.sleep(TIME_STEP) - - mpt.init_mp_stat() - _mp_stat = mpt.mp_stat - #_print_mem() - - sar_proc = mpt.launch_sar(sar_log, TIME_STEP) #launching sar for io,mem,cpu monitoring - - time.sleep(3*TIME_STEP) - - _print_mem(); - t0=t1=t2=t3=0 - t0=time.time() - mproc = mpt.launch_athenaMP(jobo, np, ne); #launching athena-MP - mpid = mproc.pid #mother process pid - print ("parent launched ...[ %i]" % mpid ) - - mp_log = os.path.join("mp.output", "stdout.%s" % suffix) - #print ("mpid_log = ", mp_log) - - _mproc = mpt.ProcDict(mpid, child=False) - - time.sleep(TIME_STEP); - - #SERIAL: Mother Init Stage - while not mpt.children_born(mp_log, mpid,np) and _mproc.proc_ps_stat(): - if np==0: break - time.sleep(TIME_STEP) - t1=time.time() - - - #PARALLEL Stage - while mpt.children_working(mpid) and _mproc.proc_ps_stat(): - if np==0: break - _print_mem() - time.sleep(TIME_STEP) - t2 = time.time() - - _print_mem() - - print ("children processes finished:") - - #SERIAL: Mother-Finalize stage - while mproc.poll() is None: - _mproc.proc_ps_stat() - _print_mem() - time.sleep(TIME_STEP) - t3 = time.time() - - mpt.summarize_proc_stat() - - #print ("EXIT, thus have to terminate all created processes:") - try: - mproc.wait(); print ("mproc joined-finished") - except Exception as e: - print ("## while waiting mother process caught exception [%s] !!" % str(e.__class__), "## What:",e,) - print (sys.exc_info()[0], sys.exc_info()[1]) - sc = 1 - pass - - for i in range(3): - _print_mem() - time.sleep(TIME_STEP) - - print ("FINISHED MONITORING:") - mpt.stop_proc(sar_proc) - - print ("COLLECTING STATISTICS...") - mpt.get_full_sar_stat(sar_log) - print ("FINISHED COLLECTING STATISTICS") - - print ("START ANALYSIS...") - - cp_dir = mpt.grepPath(mp_log, "workdir", sep=':') - #print ("worker master cpid_dir = ", cp_dir) - print (" ELAPSED TIMES: \n MotherInit: dt1=[%i sec] \n Parallel dt2=[%i sec] \n MotherFinalize dt3=[%i sec]" % (t1-t0, t2-t1, t3-t2)) - - _mp_stat['cp_summary']=mpt.CPSummary(np) - _mp_stat['mp_summary']=mpt.MPSummary(np) - _mp_summary = _mp_stat['mp_summary'] - _cp_summary = _mp_stat['cp_summary'] - - _mp_summary.extract_summary(mp_log) - _cp_summary.extract_summary(cp_dir) - _mp_summary['x_init_time']= [t1-t0, ] #externally observed time - _mp_summary['x_par_time'] = [t2-t1, ] #externally observed time - _mp_summary['x_fin_time'] = [t3-t2, ] #externally observed time - - _mp_summary['event_rate']= [ float(ne)*60.0/float(_mp_summary['m_par_time'][0]), ] - _mp_summary['event_rate_x'] = [ float(ne)*60.0/float(t2-t1), ] - _mp_summary['event_proc_rate']= [ _mp_summary['event_rate'][0] / float(np), ] - _mp_summary['event_proc_rate_x'] = [_mp_summary['event_rate_x'][0] / float(np), ] - - - mpt.print_summary() - - print ("FINISHED ANALYSIS") - - print ("START REPORT...") - mpt.prepare_mp_stat() # preparing mp_stat dictionary for ROOT - import pickle - pickle.dump(_mp_stat, open("pickle.%s.f" % suffix, "wb")) - - mpt.writeRootFile("%s.root" % suffix, np) - print ("FINISHED REPORT.") - - cleanup() - - import platform - from socket import gethostname - host_name = gethostname().split('.')[0] - merged_root_file = "%s.mp.%s.ne%i.%s.root" % (host_name, jobo, ne, options.commentsStr) - mpt.mergeRootOutput(merged_root_file, jobo, np_list, ne) - - if options.doPlots: - mpt.report(merged_root_file, ne, comments = options.commentsStr) - - cleanup() - print ("The End") - sys.exit(0) - diff --git a/Control/AthenaMP/share/tests/mp_basic_test.py b/Control/AthenaMP/share/tests/mp_basic_test.py deleted file mode 100644 index 91ac6f92eff939379e4e9dde81377924dc26c62f..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/tests/mp_basic_test.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python - -# @file mp_basic_test.py -# @purpose: simple file to create a few elephantino events with athena-mp - -from __future__ import print_function - -input_file_name = 'my.data.pool' -output_file_name= 'reaccessed.my.data.pool' - -import PyUtils.AthFile as af -af.server.flush_cache() - -import os -import AthenaCommon.ChapPy as accp -app = accp.AthenaApp() -app << """ -EVTMAX=1000 -OUTPUT='%(input_file_name)s' -""" % globals() -app.include('AthExThinning/AthExThinning_makeData.py') - -print ("=== create an elephantino file...") -rc = app.run(stdout=os.devnull) -if rc: - raise RuntimeError(rc) -print ("=== create an elephantino file... [ok]") - - -app = accp.AthenaApp(cmdlineargs=['--nprocs=-1']) -app << """ -EVTMAX=1000 #-1 -INPUT=['%(input_file_name)s'] -OUTPUT='%(output_file_name)s' -""" % globals() - -app.include('AthExThinning/ReadNonThinnedData_jobOptions.py') - -mp_logfile = open('mp.elephantino.readback.logfile.txt', 'w+') -print ("=== read the elephantino file back (with athena-mp)... (logfile=%s)" % (mp_logfile.name,)) -rc = app.run(stdout=mp_logfile) -if rc: - raise RuntimeError(rc) -print ("=== read the elephantino file back (with athena-mp)... [ok]") - -input_file = af.fopen(input_file_name).infos -output_file = af.fopen(output_file_name).infos - -print (":"*80) -print ("::: results:") - -print ("""\ -input_file: [%s] - nentries: %s""" % ( - input_file['file_name'], - input_file['nentries'])) - -print ("""\ -output_file: [%s] - nentries: %s""" % ( - output_file['file_name'], - output_file['nentries'])) - -print ("::: bye.") -print (":"*80) diff --git a/Control/AthenaMP/share/tests/mp_genevt_test.py b/Control/AthenaMP/share/tests/mp_genevt_test.py deleted file mode 100644 index 05a7e125c3ccd5ea2825a178f30f6590bcdc9ccc..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/tests/mp_genevt_test.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python - -# @file mp_genevt_test.py -# @purpose: simple file to create a few ttbar events and read them back -# with athena-mp - -from __future__ import print_function - -input_file_name = 'mc.event.pool' -output_file_name= 'reaccessed.mc.event.pool' - -import PyUtils.AthFile as af -af.server.flush_cache() - -import os -import AthenaCommon.ChapPy as accp -app = accp.AthenaApp() -app << """ -EVTMAX=1000 -OUTPUT='%(input_file_name)s' -""" % globals() -app.include('McParticleTests/iotest_WriteGenEvent_jobOptions.py') - -evt_logfile = open('mp.evgen.logfile.txt', 'w+') -print ("=== create an EVGEN file...") -rc = app.run(stdout=evt_logfile) -if rc: - raise RuntimeError(rc) -print ("=== create an EVGEN file... [ok]") - - -app = accp.AthenaApp(cmdlineargs=['--nprocs=-1']) -app << """ -EVTMAX=1000 -INPUT=['%(input_file_name)s'] -OUTPUT='%(output_file_name)s' -""" % globals() - -app.include('McParticleTests/iotest_ReadGenEvent_jobOptions.py') - -mp_logfile = open('mp.readback.logfile.txt', 'w+') -print ("=== read the EVGEN file back (with athena-mp)... (logfile=%s)" % (mp_logfile.name,)) -rc = app.run(stdout=mp_logfile) -if rc: - raise RuntimeError(rc) -print ("=== read the EVGEN file back (with athena-mp)... [ok]") - -print (":"*80) -print ("::: results:") -input_file = af.fopen(input_file_name).infos -print ("input_file: [%s]\n nentries: %s" % (input_file['file_name'], - input_file['nentries'],)) - -output_file = af.fopen('reaccessed.mc.event.pool').infos -print ("output_file: [%s]\n nentries: %s" % (output_file['file_name'], - output_file['nentries'],)) -print ("::: bye.") -print (":"*80) diff --git a/Control/AthenaMP/share/tests/smem_mon.py b/Control/AthenaMP/share/tests/smem_mon.py deleted file mode 100755 index 79d95923b325eb383bd3c35fbc07ad9d3b769771..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/tests/smem_mon.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python - -# @author: Mous Tatarkhanov <tmmous@cern.ch> -# @date: August 23, 2010 -# @example: -# @code -# @endcode -# - -from __future__ import print_function - - -__version__ = "$Revision: 000001 $" -__author__ = "Mous Tatarkhanov <tmmous@cern.ch>" - -from optparse import OptionParser - -import sys, os -import time, operator - -from future import standard_library -standard_library.install_aliases() -import subprocess - -smem_exe = "/afs/cern.ch/user/t/tmmous/smem-0.9/smem" -smem_log = "smem_log" -smem_ppid = None -smem_time_step = 0.5 - -### helpers ------------------------------------------------------------------- -def smem(ppid = None, message = None): - - if ppid is None: - ppid = smem_ppid - - if message is not None: - cmd = "echo %s >> %s" % (message, smem_log) - out = subprocess.getoutput(cmd) - - cmd = "%s -P athena.py -s pid >> %s" % (smem_exe, smem_log) - out += subprocess.getoutput(cmd) - - print ("smem: %s" % out) - - if ps_line_nbr(ppid) > 0: - return True - else: - return False - -def ps_line_nbr(ppid): - cmd = "ps --ppid %s -o pid,state,vsize,rss,sz,start,cputime,etime " % ppid - (sc, out) = subprocess.getstatusoutput(cmd) - - if (sc != 0): - print ("%s\n" % cmd) - print (" PS> ERRROR... sc=%i" % sc) - print (" out=%s" % out ) - return 0 - - print (">PS sc=%i" % sc) - print ("%s" % out) - - - ln = len(out.splitlines()) - 1 - print ("line_nbr=", ln) - return ln - -def get_cpu(pid): - cmd = "ps --pid %i -o psr" % pid - #print (">%s" % cmd) - out = subprocess.getoutput(cmd) - cpu = int(out.splitlines()[1].split()[0]) - #print ("pid: [%i] has cpu: [%i]" % (pid, cpu)) - return cpu - -def set_proc_affinity(pid, cpu): - cmd = "taskset -pc %i %i" % (cpu, pid) - #print ("> taskset -pc %i %i" % (cpu, pid) ) - st,out = subprocess.getstatusoutput(cmd) - return st - -time_list = list() - -def watch( message=None): - time_list.append(time.time()) - - if message is not None: - return "[%i] %s " % (dt(), message) - else: - return len(time_list) - -def dt(n=-1): - return time_list[n] - time_list[n-1] - -if __name__ == "__main__": - - parser = OptionParser(usage="usage: %prog [options] -f jobo") - p = parser.add_option - p( "-l", - "--log", - dest = "log_file", - default = None, - help ="smem log file " - ) - - p( "-e", - "--exe", - dest="exe_file", - default = "/afs/cern.ch/user/t/tmmous/smem-0.9/smem", - help="location of smem executable" - ) - p( "-p", - "--ppid", - dest="ppid", - default = None, - help = "parent process pid" - ) - p ( "-t", - "--time_step", - dest = "time_step", - default = 0.5, - help = "smem measurement time step" - ) - (options, args) = parser.parse_args() - - if options.log_file == None: - str(parser.print_help() or "") - sys.exit(1) - - if options.ppid == None: - str("invalid ppid given") - str(parser.print_help() or "") - sys.exit(1) - - if (options.exe_file == None) or not os.path.exists(options.exe_file): - str("invalid exe_file, please provide exe file location") - str(parser.print_help() or "") - sys.exit(1) - - smem_log = options.log_file - smem_exe = options.exe_file - smem_ppid = options.ppid - smem_time_step = float(options.time_step) - - print ("smem log_file = [%s]" % smem_log) - print ("smem exe_file = [%s]" % smem_exe) - print ("smem ppid = [%s]" % smem_ppid) - print ("smem time_step = [%.1f]" % smem_time_step) - - if os.path.exists(smem_log): - print (" given smem_log name %s exists.. renaming it to old.%s" % (smem_log, smem_log)) - os.rename(smem_log, "OLD.%s" % smem_log) - - t0 = time.time() - - while( smem( message = "time=%.2f" % (time.time()-t0)) ): - time.sleep(smem_time_step); - pass - - print ("DONE...") diff --git a/Control/AthenaMP/share/tests/test_VetoFirstEvent.py b/Control/AthenaMP/share/tests/test_VetoFirstEvent.py deleted file mode 100755 index a14d30516fc242434e99e6b3039c0831f356f05e..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/share/tests/test_VetoFirstEvent.py +++ /dev/null @@ -1,89 +0,0 @@ - -#============================================================== -# Job Options for fast-reco with AthenaMP -#============================================================== - -# assumptions: -# 1. RecExCommon_links.sh to be sourced in curdir -# 2. CLI option --nprocs to be used in command line run of athena.py - - -#---------------------------------------------------------------------- -# AthenaMP properties -#---------------------------------------------------------------------- -# expect "--nprocs" to be used in command line options of athena.py -from AthenaMP.AthenaMPFlags import jobproperties as jps -jps.AthenaMPFlags.EventsBeforeFork=3 - -import multiprocessing -cpu_list = range( multiprocessing.cpu_count() ) # [0,1,2,..,ncpus] for many-core machine -cpu_list.reverse() #reverse the cpu-proc pinning order -jps.AthenaMPFlags.AffinityCPUList=cpu_list - - - -from AthenaCommon.AlgSequence import AlgSequence -job = AlgSequence() -# schedule our analysis algorithm -# from file(.py) import classname -from AthenaMP.VetoFirstEvent import VetoFirstEvent -job += VetoFirstEvent(name='EventOutputVeto') -job.EventOutputVeto.OutputLevel = INFO -job.EventOutputVeto.EventsBeforeFork=jps.AthenaMPFlags.EventsBeforeFork - -#---------------------------------------------------------------------- -# Setting fast-reco w/o Calo, Muon or Trigger. -#---------------------------------------------------------------------- - -# expect RecExCommon_links.sh to be sourced in curdir - -from AthenaCommon.AthenaCommonFlags import athenaCommonFlags as acFlags -acFlags.EvtMax=10 -acFlags.PoolESDOutput="ESD.pool.root" - -from RecExConfig.RecFlags import rec -rec.doPerfMon=True - -rec.doCalo=False -rec.doMuon=False -#rec.doID=True -rec.doTrigger=False - -rec.doESD=True -rec.doAOD=False -rec.doHist=False -rec.doWriteESD=True -rec.doWriteAOD=False -rec.doWriteTAG=False - -# if needed to configure trigger -# see https://twiki.cern.ch/twiki/bin/view/Atlas/TriggerFlags -# include ( "TriggerJobOpts/TriggerFlags.py" ) - -# if needed to configure AOD building -# see https://twiki.cern.ch/twiki/bin/view/Atlas/UserAnalysisTest#The_AOD_Production_Flags -# from ParticleBuilderOptions.AODFlags import AODFlags - -# main jobOption -include ("RecExCommon/RecExCommon_topOptions.py") - - -#---------------------------------------------------------------------- -# FOR DEBUGGING PURPOSES -#---------------------------------------------------------------------- -#from AthenaCommon.AppMgr import theApp -#theApp.ReflexPluginDebugLevel = 10000 - -#from AthenaCommon.Logging import log as msg -#msg.info ( "svcMgr=%s" % svcMgr) -#msg.info ( "appMgr=%s" % theApp) - - - -#---------------------------------------------------------------------- -# user modifier should come here -#---------------------------------------------------------------------- - -StreamESD.VetoAlgs += ["EventOutputVeto"] -#StreamAOD.VetoAlgs += ["EventOutputVeto"] -AANTupleStream.ExistDataHeader = False diff --git a/Control/AthenaMP/test/AthenaMP.xml b/Control/AthenaMP/test/AthenaMP.xml deleted file mode 100644 index 3a8eb5f2d8d7966c88bb5cc155ab3a767757cf09..0000000000000000000000000000000000000000 --- a/Control/AthenaMP/test/AthenaMP.xml +++ /dev/null @@ -1,27 +0,0 @@ -<?xml version="1.0"?> -<atn> - <TEST name="athenamp.basic" type="script" suite="athenamp"> - <package_atn>Control/AthenaMP</package_atn> - <!-- <options_atn>chappy.py AthenaMP/tests/mp_basic_test.py</options_atn> --> - <options_atn>python -c 'print "OK"'</options_atn> - <timelimit>30</timelimit> - <author> Sebastien Binet </author> - <mailto> binet@cern.ch </mailto> - <expectations> - <returnValue>0</returnValue> - </expectations> - </TEST> - - <TEST name="athenamp.genevt" type="script" suite="athenamp"> - <package_atn>Control/AthenaMP</package_atn> - <!-- <options_atn>chappy.py AthenaMP/tests/mp_genevt_test.py</options_atn> --> - <options_atn>python -c 'print "OK"'</options_atn> - <timelimit>30</timelimit> - <author> Sebastien Binet </author> - <mailto> binet@cern.ch </mailto> - <expectations> - <returnValue>0</returnValue> - </expectations> - </TEST> - -</atn> diff --git a/Reconstruction/RecJobTransforms/share/CommonRecoSkeletonJobOptions.py b/Reconstruction/RecJobTransforms/share/CommonRecoSkeletonJobOptions.py index 7325a901c361a0792c8f23431d5a7c31b84f2f27..122b14c211bc113786dde2d1d2852ae95facf268 100644 --- a/Reconstruction/RecJobTransforms/share/CommonRecoSkeletonJobOptions.py +++ b/Reconstruction/RecJobTransforms/share/CommonRecoSkeletonJobOptions.py @@ -62,4 +62,4 @@ if hasattr(runArgs, "valid") and runArgs.valid is True: # Avoid command line preInclude for event service if hasattr(runArgs, "eventService") and runArgs.eventService: - include('AthenaMP/AthenaMP_EventService.py') + import AthenaMP.EventService diff --git a/Simulation/SimuJobTransforms/share/CommonSkeletonJobOptions.py b/Simulation/SimuJobTransforms/share/CommonSkeletonJobOptions.py index 51257d2a0a5c94ebe16804991269bec136409cde..791a3031c5bdcd11f52723c528cce137d27dd870 100644 --- a/Simulation/SimuJobTransforms/share/CommonSkeletonJobOptions.py +++ b/Simulation/SimuJobTransforms/share/CommonSkeletonJobOptions.py @@ -35,7 +35,7 @@ if hasattr(runArgs,"beamType"): # Avoid command line preInclude for event service if hasattr(runArgs, "eventService") and runArgs.eventService: - include('AthenaMP/AthenaMP_EventService.py') + import AthenaMP.EventService ## autoConfiguration keywords triggering pre-defined functions ## if hasattr(runArgs,"autoConfiguration"): diff --git a/Tools/FullChainTransforms/share/FastChainSkeleton.EVGENtoRDO.py b/Tools/FullChainTransforms/share/FastChainSkeleton.EVGENtoRDO.py index 505f47e52d4cb523bea817c3612bdcdaa59515c8..093b50164225e1b87f7431eb9b374a2a08abf385 100644 --- a/Tools/FullChainTransforms/share/FastChainSkeleton.EVGENtoRDO.py +++ b/Tools/FullChainTransforms/share/FastChainSkeleton.EVGENtoRDO.py @@ -76,7 +76,7 @@ if hasattr(runArgs,"beamType"): # Avoid command line preInclude for event service if hasattr(runArgs, "eventService") and runArgs.eventService: - include('AthenaMP/AthenaMP_EventService.py') + import AthenaMP.EventService #####################Back to Skeleton.EVGENtoHIT.py###################### diff --git a/Tools/FullChainTransforms/share/skeleton.EVGENtoRDO.py b/Tools/FullChainTransforms/share/skeleton.EVGENtoRDO.py index 164ce11c7e119337aeadbf03d81ad32ab2ca0f31..f06ed74dc4777a14157b33afdf65f09c75962b22 100644 --- a/Tools/FullChainTransforms/share/skeleton.EVGENtoRDO.py +++ b/Tools/FullChainTransforms/share/skeleton.EVGENtoRDO.py @@ -116,7 +116,7 @@ if jobproperties.Beam.beamType.get_Value() != 'cosmics': # Avoid command line preInclude for event service if hasattr(runArgs, "eventService") and runArgs.eventService: - include('AthenaMP/AthenaMP_EventService.py') + import AthenaMP.EventService from ISF_Config.ISF_jobProperties import ISF_Flags if jobproperties.Beam.beamType.get_Value() == 'cosmics': diff --git a/Tools/PyJobTransforms/share/skeleton.EVNTMerge.py b/Tools/PyJobTransforms/share/skeleton.EVNTMerge.py index 237269adfef2cf6d884997d28fd8910bf515198a..8767b681f5c42cc8cc5fd34230e2e1efb95cc12e 100644 --- a/Tools/PyJobTransforms/share/skeleton.EVNTMerge.py +++ b/Tools/PyJobTransforms/share/skeleton.EVNTMerge.py @@ -33,7 +33,7 @@ if hasattr(runArgs, "preInclude"): # Avoid command line preInclude for Event Service if hasattr(runArgs, "eventService") and runArgs.eventService: - include('AthenaMP/AthenaMP_EventService.py') + import AthenaMP.EventService ## Post-include if hasattr(runArgs, "postInclude"):