#!/usr/bin/python # # prologue.user script for Machine/Job Features on Torque/PBS # # Andrew McNab, University of Manchester. # Copyright (c) 2016. All rights reserved. # # Redistribution and use in source and binary forms, with or # without modification, are permitted provided that the following # conditions are met: # # o Redistributions of source code must retain the above # copyright notice, this list of conditions and the following # disclaimer. # o Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials # provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # Create $JOBFEATURES files following the Machine/Job Features # specification in HSF-TN-2016-02 # # This script creates the $JOBFEATURES directory at # $PBS_O_HOME/jobfeatures-$PBS_JOBID and attempts to populate it # from Torque/PBS information and from # $MACHINEFEATURES=/etc/machinefeatures # # If the following variables are present in /var/run/mjf or # /etc/sysconfig/mjf then they are preferred: # # - hs06_job # - allocated_cpu # - wall_limit_secs # - cpu_limit_secs # - max_rss_bytes # - max_swap_bytes # - scratch_limit_bytes # import os import re import sys import time homeDir = os.environ['PBS_O_HOME'] job_id = sys.argv[1] jobfeaturesDir = homeDir + '/jobfeatures-' + job_id os.mkdir(jobfeaturesDir) open(jobfeaturesDir + '/job_id', 'w').write(job_id) jobfeatures = {} jobfeatures['allocated_cpu'] = 1 jobstart_secs = int(time.time()) open(jobfeaturesDir + '/jobstart_secs', 'w').write(str(jobstart_secs)) # Examine the 5th argument, for resource limits try: # if just 1 processor-per-node then "nodes=1", but if 8, say, then "nodes=1:ppn=8" ppnMatchObject = re.search('nodes=[0-9]*:ppn=([0-9]*)', sys.argv[5]) jobfeatures['allocated_cpu'] = int(ppnMatchObject.group(1)) except: pass try: wallMatchObject = re.search('walltime=([0-9]*):([0-9]*):([0-9]*)', sys.argv[5]) jobfeatures['wall_limit_secs'] = int(wallMatchObject.group(1)) * 3600 + int(wallMatchObject.group(2)) * 60 + int(wallMatchObject.group(3)) except: pass try: cpuMatchObject = re.search('cput=([0-9]*):([0-9]*):([0-9]*)', sys.argv[5]) jobfeatures['cpu_limit_secs'] = int(cpuMatchObject.group(1)) * 3600 + int(cpuMatchObject.group(2)) * 60 + int(cpuMatchObject.group(3)) except: pass try: rssMatchObject = re.search('mem=([0-9]*)([a-z]*)', sys.argv[5]) rssInt = int(rssMatchObject.group(1)) rssUnit = rssMatchObject.group(2) except: pass else: # Safer to assume powers of 1000 rather than 1024 if rssUnit == 'gb': jobfeatures['max_rss_bytes'] = rssInt * 1000000000 elif rssUnit == 'mb': jobfeatures['max_rss_bytes'] = rssInt * 1000000 elif rssUnit == 'kb': jobfeatures['max_rss_bytes'] = rssInt * 1000 elif rssUnit == 'b': # Is this the right name?? jobfeatures['max_rss_bytes'] = rssInt # Values in /var/run/mjf take precedence try: fromRun = open('/var/run/mjf','r').read() except: fromRun = '' # Also look in persistent /etc/sysconfig/mjf try: fromSysconfig = open('/etc/sysconfig/mjf','r').read() except: fromSysconfig = '' # These are all integers so handle with a list for key in ['allocated_cpu', 'wall_limit_secs', 'cpu_limit_secs', 'max_rss_bytes', 'max_swap_bytes', 'scratch_limit_bytes']: if key + '=' in fromRun + fromSysconfig: try: matchObject = re.search(key + "=([0-9]*)", fromSysconfig + fromRun) jobfeatures[key] = int(matchObject.group(1)) except: pass if key in jobfeatures: open(jobfeaturesDir + '/' + key, 'w').write(str(jobfeatures[key])) # Try to get/calculate hs06_job hs06_job = None if 'hs06_job=' in fromRun + fromSysconfig: try: matchObject = re.search("hs06_job=([0-9.]*)", fromSysconfig + fromRun) hs06_job = float(matchObject.group(1)) except: pass if not hs06_job: try: hs06 = float(open('/etc/machinefeatures/hs06','r').readline()) except: hs06 = None try: total_cpu = int(open('/etc/machinefeatures/total_cpu','r').readline()) except: total_cpu = None if hs06 and total_cpu: # Simple pro-rata allocation of total hs06 depending on processors for this job hs06_job = (jobfeatures['allocated_cpu'] * hs06) / total_cpu if hs06_job: # We got it from somewhere open(jobfeaturesDir + '/hs06_job', 'w').write('%.2f' % hs06_job)